⟨ Index | Module NatExamples.AdversarialTeacher
{-# OPTIONS --safe #-}
import Level as L
open import Level using (Level)

open import Data.Empty
open import Data.Nat as  hiding (_⊔_)
open import Data.Nat.Properties as 
open import Data.Nat
open import Data.Nat.Logarithm
open import Data.Maybe using (Maybe; just; nothing)
import Data.Maybe as Maybe
open import Data.Product as Σ
open import Data.Sum as 
open import Data.Unit
open import Function.Base
open import Relation.Binary.PropositionalEquality
open import Relation.Binary.Definitions
open import Relation.Unary
open import Relation.Nullary
open import Utils

module NatExamples.AdversarialTeacher where
open import NatLearning

record Interval : Set where
  -- closed intervals on natural numbers.
  -- Hence, they are always non-empty
  constructor [_,+_]
  field
    low : 
    len : 
  high : 
  high = low + len

⟦_⟧ : Interval    Set
 [ low ,+ len ]  x = low  x × x  low + len

record _⊧_ (iv : Interval) (h : ) : Set where
  constructor mk⊧
  module iv = Interval iv
  field
    m : 
    k : 
    low+m≡h : iv.low + m  h
    m+k≡len : m + k  iv.len
  low≤h : iv.low  h
  low≤h = ≤-trans (m≤m+n iv.low m) (≤-reflexive low+m≡h)
  h≤high : h  iv.high
  h≤high = begin
    h ≡⟨ low+m≡h 
    iv.low + m ≤⟨ +-monoʳ-≤ iv.low (m≤m+n m k) 
    iv.low + (m + k) ≡⟨ cong (iv.low +_) m+k≡len 
    iv.low + iv.len
    
    where open ≤-Reasoning
  ⟦iv⟧ :  iv  h
  ⟦iv⟧ = low≤h , h≤high

mk⊧-≤ :  {iv} {h}  Interval.low iv  h  h  Interval.high iv  iv  h
mk⊧-≤ {iv} {h} low≤h h≤high =
  let
    m , low+m≡h = m≤n⇒m+k≡n low≤h
    k , h+k≡low+len = m≤n⇒m+k≡n h≤high
  in
  mk⊧ m k low+m≡h (+-cancelˡ-≡ low (m + k) len (begin
    low + (m + k) ≡⟨ +-assoc low m _ 
    (low + m) + k ≡⟨ cong (_+ k) low+m≡h 
    h + k ≡⟨ h+k≡low+len 
    low + len
    ))
  where
    open Interval iv
    open ≡-Reasoning

_+?≡_ :  n m  (∃[ k ] n + k  m)  m < n
_+?≡_ n m with (n ≤? m)
... | yes n≤m = inj₁ (m≤n⇒m+k≡n n≤m)
... | no n≰m = inj₂ (≰⇒> n≰m)

-- having n options on the left and m options
-- on the right, the an inhabitant of Go-Bigger n m
-- shows the side on which there are more options:
data Go-Bigger :     Set where
  go-left :  {m} {k}  (m>k : m > k)  Go-Bigger m k
  go-right :  {m} {k}  (m<k : m < k)  Go-Bigger m k
  go-either :  {m}  Go-Bigger (1 + m) (1 + m)
  go-nowhere : Go-Bigger 0 0

bisect-cases :  n m  Go-Bigger n m
bisect-cases n m with (<-cmp n m)
bisect-cases n m | tri< n<m _ _ = go-right n<m
bisect-cases n m | tri> _ _ n>m = go-left n>m
bisect-cases ℕ.zero _ | tri≈ _ refl _ = go-nowhere
bisect-cases (ℕ.suc n) _ | tri≈ _ refl _ = go-either

record AT (counter : ) : Set where
  -- the adversarial teacher that has some number
  -- in the interval and can 'defend' for 'counter'
  -- many rounds
  constructor adv-teacher
  field
    interval : Interval
  open Interval interval public
  field
    -- if the teacher wants to answer
    -- 'wrong' for 'counter' many times,
    -- then the interval must contain
    -- at least 2 ^ counter many elements,
    -- which is 1 + len
    2^cnt≤len : 2 ^ counter  1 + len

forget-AT :  {cnt}  AT (suc cnt)  AT cnt
forget-AT {cnt} (adv-teacher iv 2^suc[cnt]≤len) =
  adv-teacher iv (≤-trans (^-monoʳ-≤ 2 (m≤n+m cnt 1)) 2^suc[cnt]≤len)

record _⊆iv_ (iv' : Interval) (iv : Interval) : Set where
  module iv = Interval iv
  module iv' = Interval iv'
  open Interval iv' public
  field
    low≤ : iv.low  iv'.low
    high≤ : iv'.high  iv.high

⊆iv-reflexive :  {iv}  iv ⊆iv iv
⊆iv-reflexive {iv} = record { low≤ = ≤-reflexive refl ; high≤ = ≤-reflexive refl }

_⊆t_ :  {k' k}  AT k'  AT k  Set
_⊆t_ t' t = AT.interval t' ⊆iv AT.interval t

⊆iv-preserve-⊧ :  {iv} {iv'}  iv ⊆iv iv'  {h : }  iv  h  iv'  h
⊆iv-preserve-⊧ iv⊆iv' {h} iv⊧h = mk⊧-≤ (≤-trans iv⊆iv'.low≤ iv⊧h.low≤h) (≤-trans iv⊧h.h≤high iv⊆iv'.high≤)
  where
    module iv⊆iv' = _⊆iv_ iv⊆iv'
    module iv⊧h = _⊧_ iv⊧h

mkTooLow :  {iv : Interval} {hyp : }  hyp < Interval.low iv   iv   ⟦WrongNat too-low  hyp
mkTooLow hyp<low {x} (low≤x , _) = ≤-trans hyp<low low≤x

mkTooHigh :  {iv : Interval} {hyp : }  hyp > Interval.high iv   iv   ⟦WrongNat too-high  hyp
mkTooHigh hyp>high {x} (_ , x≤high) = ≤-trans (s≤s x≤high) hyp>high

δ-prop :  {cnt}  (t : AT (suc cnt))  (hyp : )  Σ[ (wn , t')  WrongNat × AT cnt ] ( AT.interval t'   ⟦WrongNat wn  hyp × t' ⊆t t)
δ-prop {cnt} t@(adv-teacher [ low ,+ len ] 2^cnt≤len) hyp with (low +?≡ hyp)
... | inj₂ hyp<low = (too-low , forget-AT t) , mkTooLow hyp<low , ⊆iv-reflexive
... | inj₁ (m , refl) with (m +?≡ len)
...   | inj₂ len<m = (too-high , forget-AT t) , mkTooHigh (+-monoʳ-< low len<m) , ⊆iv-reflexive
...   | inj₁ (k , refl) with bisect-cases m k
...      | go-nowhere = ⊥-elim (≤⇒≯ 2^cnt≤len (begin
                          2 ≡⟨⟩
                          2 * 1 ≤⟨ *-monoʳ-≤ 2 (m^n>0 2 cnt) 
                          2 * (2 ^ cnt) ≡⟨⟩
                          2 ^ cnt + (2 ^ cnt + 0)
                        ))
                        where open ≤-Reasoning
...      | go-left m>k =
           let
             instance
               m-nz : NonZero m
               m-nz = >m-nonZero m>k
           in
           (too-high , adv-teacher [ low ,+ pred m ] (begin
                       2 ^ cnt ≤⟨ bisect-sum {cnt} (≤-trans 2^cnt≤len (≤-reflexive (sym (+-suc m k)))) m>k 
                       m ≡⟨ suc-pred m 
                       suc (pred m)
                       ))
           , mkTooHigh (begin
                        suc (low + pred m) ≡⟨ +-suc low _ 
                        low + suc (pred m) ≡⟨ cong (low +_) (suc-pred m) 
                        low + m
                        )
           , record
             { low≤ = ≤-reflexive refl
             ; high≤ = +-monoʳ-≤ low (≤-trans pred-≤ (m≤m+n m k))
             }
           where open ≤-Reasoning
...      | go-right m<k =
           let
             instance
               k-nz : NonZero k
               k-nz = >m-nonZero m<k
           in
           (too-low , adv-teacher [ (low + (1 + m)) ,+ pred k ] (begin
                       2 ^ cnt ≤⟨ bisect-sum {cnt} (≤-trans 2^cnt≤len (≤-reflexive (+-comm (suc m) k))) m<k 
                       k ≡⟨ suc-pred k 
                       suc (pred k)
                       ))
           , mkTooLow (≤-reflexive (sym (+-suc low m)))
           , record
             { low≤ = m≤m+n low _
             ; high≤ = begin
                     low + suc m + (k  1) ≡⟨ +-assoc low _ _ 
                     low + (suc m + (k  1)) ≡⟨ cong (low +_) (+-suc m _) 
                     low + (m + suc (k  1)) ≡⟨ cong  -  low + (m + -)) (suc-pred k) 
                     low + (m + k)
                     
             }
           where open ≤-Reasoning
...      | go-either {m'} =
           let
             2m1m :  {m}  2+ (m + suc m)  1 + 2 * (suc m)
             2m1m {m} = begin
               1 + (suc (m + suc m)) ≡⟨⟩
               1 + (suc m + suc m) ≡⟨ cong suc (2*n≡n+n (suc m)) 
               1 + 2 * suc m
               

             tmp : 2 ^ (suc cnt)  1 + 2 * (suc m')
             tmp = begin
               2 ^ (suc cnt) ≤⟨ 2^cnt≤len  -- 2^cnt≤len ⟩
               2+ (m' + suc m') ≤⟨ 2m1m 
               1 + 2 * (suc m')
               
           in
           (too-high , adv-teacher [ low ,+ pred m ] (2*n≤2*k+1⇒n≤k (2 ^ cnt) m (subst  -  2 ^ (suc cnt)  suc -) (sym (2*n≡n+n m)) 2^cnt≤len))) -- (2^c≤1+k cnt m tmp ))
           , (mkTooHigh (≤-reflexive (sym (+-suc low m')))
           , record
             { low≤ = ≤-reflexive refl
             ; high≤ = +-monoʳ-≤ low (≤-trans (m≤m+n m' m) pred-≤)
             })
           where open ≤-Reasoning

-- if we have a teacher who can defend for (ℕ.suc cnt) many rounds,
-- this implies that the δ function is always 'just' and never 'nothing':

ΣAT : Set
ΣAT = (Σ[ cnt   ] (AT cnt))

δ-f : ΣAT    Maybe (WrongNat × ΣAT)
δ-f (zero , t) hyp = nothing
δ-f (suc n , t) hyp = just
    (proj₁ (proj₁ (δ-prop t hyp)) , (n , proj₂ (proj₁ (δ-prop t hyp))))

δ-nat : Teacherδ ΣAT
δ-nat = mkTeacherδ δ-f
-- Explicit definition:
-- δ-nat (guess hyp ret next) (zero , t) = inj₁ ret
-- δ-nat (guess hyp ret next) (suc r , t) = inj₂ ((r , proj₂ (proj₁ (δ-prop t hyp))) , next (proj₁ (proj₁ (δ-prop t hyp))))

δ-is-nat : TeacherIsNatural δ-nat
δ-is-nat = mkTeacherδ-IsNatural δ-f

module Properties {ℓr : Level} (R : Set ℓr)  {ℓc : Level} (C : Set ℓc) (learner : Learnerδ R C) where
  open Game NatLearning-Semantics δ-nat learner
  
  game-lower-bound :  r (t : AT r) c 
                       ∃[ sec ]
                         (AT.interval t)  sec
                         × still-possible ((r , t) , c) r sec 
  game-lower-bound ℕ.zero (adv-teacher [ low ,+ len ] _) c =
    low , (mk⊧ 0 len (+-identityʳ _) refl) , tt
  game-lower-bound (ℕ.suc r) t c with (learner c)
  ... | guess hyp ret next with (δ-prop t hyp)
  ... | (wn , t') , ⟦t'⟧⊆⟦wn⟧ , t'⊆t =
    let
      sec , t'⊧sec , still-poss = game-lower-bound r t' (next wn)
    in
    sec , ⊆iv-preserve-⊧ t'⊆t t'⊧sec , ⟦t'⟧⊆⟦wn⟧ {sec} (_⊧_.⟦iv⟧ t'⊧sec) , still-poss
    where
      query : F R ((  Set) × C)
      query = ⟦F learner c 

adversarial-teacher : Interval  NatTeacher L.0ℓ
adversarial-teacher iv = record
  { T = ΣAT
  ; st₀ = ⌊log₂ iv.len  , adv-teacher iv (2^⌊log₂n⌋≤1+n iv.len)
  ; δ = δ-f
  }
  where
    module iv = Interval iv

-- Main Theorem: Lower bound for Nat learning:
-- For every interval of length m, we can construct a teacher,
-- such that no client can win the learning game
-- within ⌊log₂ m ⌋ rounds.
-- Concretely, we phrase this as: for every client, all guesses
-- within ⌊log₂ m ⌋ were wrong and there is still some
-- number (in the interval) that is consistent with all
-- the answers by the teacher
thm-adversarial-teacher :  {ℓc : Level} interval 
   ∃[ teacher ]  (learner : Learner ℓc)
     ∃[ sec ] interval  sec
      × learner can't-learn sec from teacher within ⌊log₂ (Interval.len interval)  rounds

thm-adversarial-teacher iv =
  record { T = ΣAT ; st₀ = st₀ ; δ = δ-f }
  , λ learner 
    let
        module learner = Learner learner
        open Properties learner.R learner.C learner.δ
    in
    game-lower-bound ⌊log₂ iv.len  t learner.q₀
  where
    module iv = Interval iv
    t = adv-teacher iv (2^⌊log₂n⌋≤1+n iv.len)
    st₀ : ΣAT
    st₀ = ⌊log₂ iv.len  , t

-- Steve Ballmer describes the following job interview question:
-- "I'm thinking of a number between 1 and 100. You can guess. After each guess, I
-- will tell you wether you're 'high' or 'low'."
-- see: https://youtu.be/svCYbkS0Sjk
-- In above video, it takes 7 question for the interviewer to guess the number correct.
-- And indeed, we can show, that the teacher can enforce it that the learner is not possible
-- to guess the number correctly within 6 guesses:
example-adversarial-100 :  {ℓc : Level} 
   ∃[ teacher ]  (learner : Learner ℓc)
     ∃[ sec ] [ 1 ,+ 99 ]  sec
      × learner can't-learn sec from teacher within 6 rounds

example-adversarial-100 = thm-adversarial-teacher [ 1 ,+ 99 ]