⟨ Index | Module Learner-Correctness
{-# OPTIONS --safe #-}
-- ----------------------------------
-- A sufficient condition for correctness and run-time
-- of learning algorithms
-- ----------------------------------
open import Level

open import Data.Product as Σ
open import Data.Sum as 
open import Data.Nat as  hiding (_⊔_)
open import Data.Nat.Properties as 
open import Data.Unit
open import Data.Empty
open import Function.Base
open import Relation.Binary.PropositionalEquality
open import Relation.Nullary.Negation


open import Game-Basics
import Game-Definitions as Defs

module Learner-Correctness {ℓd : Level}
                           (𝔻 : Set ℓd)
                           (F : Game-Type)
                           (Sem : Defs.GameSemantics 𝔻 F)
                           where

open import Game-Definitions 𝔻 F
open GameSemantics Sem

record Learner-Stepping {ℓr : Level} {R : Set ℓr} {ℓc : Level} {C : Set ℓc} (c : Learnerδ R C) (q₀ : C) (bound : 𝔻  ): Set (Level.suc ℓd  Level.suc ℓc  ℓr) where
  -- Sufficient criteria for the correctness of a learning algorithm.
  field
    -- the 'bound d' provides the number of queries required by the learner to correctly
    -- guess the secret d. So the 'bound d' also counts the final accepting
    -- query to the teacher. E.g.
    -- For example, when playing the number guessing game
    -- on the domain [1,2,3], then the valid bounds for the usual bisecting
    -- learner are:
    -- 
    --    bound(1) = 2
    --    bound(2) = 1
    --    bound(3) = 2
    --
    -- The reason for that is that the learner starts with guessing '2', so
    -- if the hidden number is '2', then it only takes one query. After that
    -- first query, the learner already *knows* the answer ('1' if teacher
    -- responded too-high and '3' if the teacher responded too-low), but it
    -- takes a second query (with precisely that number) to guess the number
    -- correctly; thus, bound(1) = bound(3) = 2.

    -- the state keep track of the time that has passed, that is, the number
    -- of queries performed by the learner so far.
    ticks : C  
    -- in a state, we only allow some secrets; the others
    -- have been excluded already by the previous queries
    allows : C  (𝔻  Set ℓc)
    q₀-correct :  sec  allows q₀ sec -- the initial state did not exclude any secrets yet

    -- if a secret is still possible in state q, then the clock is
    -- properly below the claimed time bound. It is a proper '<' because even if
    -- the learner knows that 'd' is correct, it takes one more query to correctly guess it
    ticks-below-bound :  q sec  allows q sec  ticks q < bound sec

  -- if a learner state allows a secret d,
  -- and if the teacher's response still allows d,
  -- then the successor q' of q must still allowdn
  -- and must have an increased tick counter.
  -- we describe it by the followong property

  q'-correct : 𝔻    (𝔻  Set) × C  Set ℓc
  q'-correct d ticks-q (_∈response , q') = d ∈response  allows q' d × ticks-q < ticks q'
  
  field
    preserve :  {q} {d}  allows q d  ⟦F c q  ⊧F∀ q'-correct d (ticks q)

open TeacherNaturality F-map

module Correctness {ℓt ℓc ℓr : Level} {T : Set ℓt} {R : Set ℓr} {C : Set ℓc}
                   (teacher : Teacher-on T) (learner : Learner-on R C)
                   (q₀ : C)
                   where
  private
    module teacher = Teacher-on teacher
    module learner = Learner-on learner

  open Game Sem teacher.δ learner.δ

  module learner-correct-proof-details
         (bound : 𝔻  )
         (correct : Learner-Stepping learner.δ q₀ bound)
         (n : 𝔻) -- the number of interest
         where
    open Learner-Stepping correct

    game-step-prop' : (l : C)  allows l n  M R T (Σ ((𝔻  Set) × C) (q'-correct n (ticks l)))
    game-step-prop' l n∈l = teacher.δ (F∀-elim ⟦F learner.δ l  (preserve n∈l))

    proj₁∘game-step-prop' : (l : C)  (n∈l : allows l n)  (t : T)  game-step-prop (t , l)  M-map proj₁ (game-step-prop' l n∈l) t
    proj₁∘game-step-prop' l n∈l t =
      begin
      game-step-prop (t , l)  ≡⟨ cong  -  teacher.δ - t) (proj₁∘F∀elim (preserve n∈l)) 
      teacher.δ (F-map proj₁ (F∀-elim ⟦F learner.δ l  (preserve n∈l))) t ≡⟨ teacher.natural proj₁ _ t 
      M-map proj₁ (teacher.δ (F∀-elim ⟦F learner.δ l  (preserve n∈l))) t
      
      where
        open ≡-Reasoning

    main-induction : (steps-remaining : ) (t : T) (l : C)
                      (allows l n)
                      bound n  steps-remaining + ticks l -- ticks may grow faster than 1 per query
                      learner (t , l) finds n within steps-remaining rounds
    main-induction ℕ.zero t l n∈l bound≤tick = ⊥-elim (ℕ.<⇒≱ (ticks-below-bound l n n∈l) bound≤tick)
    main-induction (ℕ.suc s) t l n∈l bound≤tick rewrite (proj₁∘game-step-prop' l n∈l t) with (teacher.δ (F∀-elim ⟦F learner.δ l  (preserve n∈l)) t)
    ... | (inj₁ _) = tt -- teacher surrenders
    ... | (inj₂ (t' , (_∈response , l') , l'-prop )) = λ n∈response 
      let
        n∈l' , ticks-l<ticks-l' = l'-prop n∈response
        open ≤-Reasoning
        bound[n]≤s+ticks-l' : bound n  s + ticks l'
        bound[n]≤s+ticks-l' =
          begin
          bound n ≤⟨ bound≤tick 
          1 + s + ticks l ≡⟨ ℕ.+-suc s _ 
          s + (1 + ticks l) ≤⟨ +-mono-≤ (≤-reflexive refl) ticks-l<ticks-l' 
          s + ticks l'
          
      in
      main-induction s t' l' n∈l' bound[n]≤s+ticks-l'


  -- Main correctness theorem for learners: The properties in above Learner-Correct
  -- are sufficient for the learner to 'win' every learning game for every hidden
  -- number n.
  learner-correct : (bound : 𝔻  )
                    Learner-Stepping learner.δ q₀ bound
                     st₀ n  learner (st₀ , q₀) finds n within (bound n) rounds
  learner-correct bound learner-correct st₀ n =
    main-induction (bound n) st₀ q₀ (q₀-correct n) (m≤m+n (bound n) _)
    where
      open Learner-Stepping learner-correct
      open learner-correct-proof-details bound learner-correct n