Learner-Correctness (Learner-Correctness)

{-# OPTIONS --safe #-}
-- ----------------------------------
-- A sufficient condition for correctness and run-time
-- of learning algorithms
-- ----------------------------------
open import Level

open import Data.Product as Σ
open import Data.Sum as ⊎
open import Data.Nat as ℕ hiding (_⊔_)
open import Data.Nat.Properties as ℕ
open import Data.Unit
open import Data.Empty
open import Function.Base
open import Relation.Binary.PropositionalEquality
open import Relation.Nullary.Negation


open import Game-Basics
import Game-Definitions as Defs

module Learner-Correctness {ℓd : Level}
                           (𝔻 : Set ℓd)
                           (F : Game-Type)
                           (Sem : Defs.GameSemantics 𝔻 F)
                           where

open import Game-Definitions 𝔻 F
open GameSemantics Sem

record Learner-Stepping {ℓr : Level} {R : Set ℓr} {ℓc : Level} {C : Set ℓc} (c : Learnerδ R C) (q₀ : C) (bound : 𝔻 → ℕ): Set (Level.suc ℓd ⊔ Level.suc ℓc ⊔ ℓr) where
  -- Sufficient criteria for the correctness of a learning algorithm.
  field
    -- the 'bound d' provides the number of queries required by the learner to correctly
    -- guess the secret d. So the 'bound d' also counts the final accepting
    -- query to the teacher. E.g.
    -- For example, when playing the number guessing game
    -- on the domain [1,2,3], then the valid bounds for the usual bisecting
    -- learner are:
    -- 
    --    bound(1) = 2
    --    bound(2) = 1
    --    bound(3) = 2
    --
    -- The reason for that is that the learner starts with guessing '2', so
    -- if the hidden number is '2', then it only takes one query. After that
    -- first query, the learner already *knows* the answer ('1' if teacher
    -- responded too-high and '3' if the teacher responded too-low), but it
    -- takes a second query (with precisely that number) to guess the number
    -- correctly; thus, bound(1) = bound(3) = 2.

    -- the state keep track of the time that has passed, that is, the number
    -- of queries performed by the learner so far.
    ticks : C → ℕ
    -- in a state, we only allow some secrets; the others
    -- have been excluded already by the previous queries
    allows : C → (𝔻 → Set ℓc)
    q₀-correct : ∀ sec → allows q₀ sec -- the initial state did not exclude any secrets yet

    -- if a secret is still possible in state q, then the clock is
    -- properly below the claimed time bound. It is a proper '<' because even if
    -- the learner knows that 'd' is correct, it takes one more query to correctly guess it
    ticks-below-bound : ∀ q sec → allows q sec → ticks q < bound sec

  -- if a learner state allows a secret d,
  -- and if the teacher's response still allows d,
  -- then the successor q' of q must still allowdn
  -- and must have an increased tick counter.
  -- we describe it by the followong property

  q'-correct : 𝔻 → ℕ → (𝔻 → Set) × C → Set ℓc
  q'-correct d ticks-q (_∈response , q') = d ∈response → allows q' d × ticks-q < ticks q'
  
  field
    preserve : ∀ {q} {d} → allows q d → ⟦F c q ⟧ ⊧F∀ q'-correct d (ticks q)

open TeacherNaturality F-map

module Correctness {ℓt ℓc ℓr : Level} {T : Set ℓt} {R : Set ℓr} {C : Set ℓc}
                   (teacher : Teacher-on T) (learner : Learner-on R C)
                   (q₀ : C)
                   where
  private
    module teacher = Teacher-on teacher
    module learner = Learner-on learner

  open Game Sem teacher.δ learner.δ

  module learner-correct-proof-details
         (bound : 𝔻 → ℕ)
         (correct : Learner-Stepping learner.δ q₀ bound)
         (n : 𝔻) -- the number of interest
         where
    open Learner-Stepping correct

    game-step-prop' : (l : C) → allows l n → M R T (Σ ((𝔻 → Set) × C) (q'-correct n (ticks l)))
    game-step-prop' l n∈l = teacher.δ (F∀-elim ⟦F learner.δ l ⟧ (preserve n∈l))

    proj₁∘game-step-prop' : (l : C) → (n∈l : allows l n) → (t : T) → game-step-prop (t , l) ≡ M-map proj₁ (game-step-prop' l n∈l) t
    proj₁∘game-step-prop' l n∈l t =
      begin
      game-step-prop (t , l)  ≡⟨ cong (λ - → teacher.δ - t) (proj₁∘F∀elim (preserve n∈l)) ⟨
      teacher.δ (F-map proj₁ (F∀-elim ⟦F learner.δ l ⟧ (preserve n∈l))) t ≡⟨ teacher.natural proj₁ _ t ⟨
      M-map proj₁ (teacher.δ (F∀-elim ⟦F learner.δ l ⟧ (preserve n∈l))) t
      ∎
      where
        open ≡-Reasoning

    main-induction : (steps-remaining : ℕ) (t : T) (l : C)
                     → (allows l n)
                     → bound n ≤ steps-remaining + ticks l -- ticks may grow faster than 1 per query
                     → learner (t , l) finds n within steps-remaining rounds
    main-induction ℕ.zero t l n∈l bound≤tick = ⊥-elim (ℕ.<⇒≱ (ticks-below-bound l n n∈l) bound≤tick)
    main-induction (ℕ.suc s) t l n∈l bound≤tick rewrite (proj₁∘game-step-prop' l n∈l t) with (teacher.δ (F∀-elim ⟦F learner.δ l ⟧ (preserve n∈l)) t)
    ... | (inj₁ _) = tt -- teacher surrenders
    ... | (inj₂ (t' , (_∈response , l') , l'-prop )) = λ n∈response →
      let
        n∈l' , ticks-l<ticks-l' = l'-prop n∈response
        open ≤-Reasoning
        bound[n]≤s+ticks-l' : bound n ≤ s + ticks l'
        bound[n]≤s+ticks-l' =
          begin
          bound n ≤⟨ bound≤tick ⟩
          1 + s + ticks l ≡⟨ ℕ.+-suc s _ ⟨
          s + (1 + ticks l) ≤⟨ +-mono-≤ (≤-reflexive refl) ticks-l<ticks-l' ⟩
          s + ticks l'
          ∎
      in
      main-induction s t' l' n∈l' bound[n]≤s+ticks-l'


  -- Main correctness theorem for learners: The properties in above Learner-Correct
  -- are sufficient for the learner to 'win' every learning game for every hidden
  -- number n.
  learner-correct : (bound : 𝔻 → ℕ)
                   → Learner-Stepping learner.δ q₀ bound
                   → ∀ st₀ n → learner (st₀ , q₀) finds n within (bound n) rounds
  learner-correct bound learner-correct st₀ n =
    main-induction (bound n) st₀ q₀ (q₀-correct n) (m≤m+n (bound n) _)
    where
      open Learner-Stepping learner-correct
      open learner-correct-proof-details bound learner-correct n