AdversarialTeacher (NatExamples.AdversarialTeacher)

{-# OPTIONS --safe #-}
import Level as L
open import Level using (Level)

open import Data.Empty
open import Data.Nat as ℕ hiding (_⊔_)
open import Data.Nat.Properties as ℕ
open import Data.Nat
open import Data.Nat.Logarithm
open import Data.Maybe using (Maybe; just; nothing)
import Data.Maybe as Maybe
open import Data.Product as Σ
open import Data.Sum as ⊎
open import Data.Unit
open import Function.Base
open import Relation.Binary.PropositionalEquality
open import Relation.Binary.Definitions
open import Relation.Unary
open import Relation.Nullary
open import Utils

module NatExamples.AdversarialTeacher where
open import NatLearning

record Interval : Set where
  -- closed intervals on natural numbers.
  -- Hence, they are always non-empty
  constructor [_,+_]
  field
    low : ℕ
    len : ℕ
  high : ℕ
  high = low + len

⟦_⟧ : Interval → ℕ → Set
⟦ [ low ,+ len ] ⟧ x = low ≤ x × x ≤ low + len

record _⊧_ (iv : Interval) (h : ℕ) : Set where
  constructor mk⊧
  module iv = Interval iv
  field
    m : ℕ
    k : ℕ
    low+m≡h : iv.low + m ≡ h
    m+k≡len : m + k ≡ iv.len
  low≤h : iv.low ≤ h
  low≤h = ≤-trans (m≤m+n iv.low m) (≤-reflexive low+m≡h)
  h≤high : h ≤ iv.high
  h≤high = begin
    h ≡⟨ low+m≡h ⟨
    iv.low + m ≤⟨ +-monoʳ-≤ iv.low (m≤m+n m k) ⟩
    iv.low + (m + k) ≡⟨ cong (iv.low +_) m+k≡len ⟩
    iv.low + iv.len
    ∎
    where open ≤-Reasoning
  ⟦iv⟧ : ⟦ iv ⟧ h
  ⟦iv⟧ = low≤h , h≤high

mk⊧-≤ : ∀ {iv} {h} → Interval.low iv ≤ h → h ≤ Interval.high iv → iv ⊧ h
mk⊧-≤ {iv} {h} low≤h h≤high =
  let
    m , low+m≡h = m≤n⇒m+k≡n low≤h
    k , h+k≡low+len = m≤n⇒m+k≡n h≤high
  in
  mk⊧ m k low+m≡h (+-cancelˡ-≡ low (m + k) len (begin
    low + (m + k) ≡⟨ +-assoc low m _ ⟨
    (low + m) + k ≡⟨ cong (_+ k) low+m≡h ⟩
    h + k ≡⟨ h+k≡low+len ⟩
    low + len
    ∎))
  where
    open Interval iv
    open ≡-Reasoning

_+?≡_ : ∀ n m → (∃[ k ] n + k ≡ m) ⊎ m < n
_+?≡_ n m with (n ≤? m)
... | yes n≤m = inj₁ (m≤n⇒m+k≡n n≤m)
... | no n≰m = inj₂ (≰⇒> n≰m)

-- having n options on the left and m options
-- on the right, the an inhabitant of Go-Bigger n m
-- shows the side on which there are more options:
data Go-Bigger : ℕ → ℕ → Set where
  go-left : ∀ {m} {k} → (m>k : m > k) → Go-Bigger m k
  go-right : ∀ {m} {k} → (m<k : m < k) → Go-Bigger m k
  go-either : ∀ {m} → Go-Bigger (1 + m) (1 + m)
  go-nowhere : Go-Bigger 0 0

bisect-cases : ∀ n m → Go-Bigger n m
bisect-cases n m with (<-cmp n m)
bisect-cases n m | tri< n<m _ _ = go-right n<m
bisect-cases n m | tri> _ _ n>m = go-left n>m
bisect-cases ℕ.zero _ | tri≈ _ refl _ = go-nowhere
bisect-cases (ℕ.suc n) _ | tri≈ _ refl _ = go-either

record AT (counter : ℕ) : Set where
  -- the adversarial teacher that has some number
  -- in the interval and can 'defend' for 'counter'
  -- many rounds
  constructor adv-teacher
  field
    interval : Interval
  open Interval interval public
  field
    -- if the teacher wants to answer
    -- 'wrong' for 'counter' many times,
    -- then the interval must contain
    -- at least 2 ^ counter many elements,
    -- which is 1 + len
    2^cnt≤len : 2 ^ counter ≤ 1 + len

forget-AT : ∀ {cnt} → AT (suc cnt) → AT cnt
forget-AT {cnt} (adv-teacher iv 2^suc[cnt]≤len) =
  adv-teacher iv (≤-trans (^-monoʳ-≤ 2 (m≤n+m cnt 1)) 2^suc[cnt]≤len)

record _⊆iv_ (iv' : Interval) (iv : Interval) : Set where
  module iv = Interval iv
  module iv' = Interval iv'
  open Interval iv' public
  field
    low≤ : iv.low ≤ iv'.low
    high≤ : iv'.high ≤ iv.high

⊆iv-reflexive : ∀ {iv} → iv ⊆iv iv
⊆iv-reflexive {iv} = record { low≤ = ≤-reflexive refl ; high≤ = ≤-reflexive refl }

_⊆t_ : ∀ {k' k} → AT k' → AT k → Set
_⊆t_ t' t = AT.interval t' ⊆iv AT.interval t

⊆iv-preserve-⊧ : ∀ {iv} {iv'} → iv ⊆iv iv' → {h : ℕ} → iv ⊧ h → iv' ⊧ h
⊆iv-preserve-⊧ iv⊆iv' {h} iv⊧h = mk⊧-≤ (≤-trans iv⊆iv'.low≤ iv⊧h.low≤h) (≤-trans iv⊧h.h≤high iv⊆iv'.high≤)
  where
    module iv⊆iv' = _⊆iv_ iv⊆iv'
    module iv⊧h = _⊧_ iv⊧h

mkTooLow : ∀ {iv : Interval} {hyp : ℕ} → hyp < Interval.low iv → ⟦ iv ⟧ ⊆ ⟦WrongNat too-low ⟧ hyp
mkTooLow hyp<low {x} (low≤x , _) = ≤-trans hyp<low low≤x

mkTooHigh : ∀ {iv : Interval} {hyp : ℕ} → hyp > Interval.high iv → ⟦ iv ⟧ ⊆ ⟦WrongNat too-high ⟧ hyp
mkTooHigh hyp>high {x} (_ , x≤high) = ≤-trans (s≤s x≤high) hyp>high

δ-prop : ∀ {cnt} → (t : AT (suc cnt)) → (hyp : ℕ) → Σ[ (wn , t') ∈ WrongNat × AT cnt ] (⟦ AT.interval t' ⟧ ⊆ ⟦WrongNat wn ⟧ hyp × t' ⊆t t)
δ-prop {cnt} t@(adv-teacher [ low ,+ len ] 2^cnt≤len) hyp with (low +?≡ hyp)
... | inj₂ hyp<low = (too-low , forget-AT t) , mkTooLow hyp<low , ⊆iv-reflexive
... | inj₁ (m , refl) with (m +?≡ len)
...   | inj₂ len<m = (too-high , forget-AT t) , mkTooHigh (+-monoʳ-< low len<m) , ⊆iv-reflexive
...   | inj₁ (k , refl) with bisect-cases m k
...      | go-nowhere = ⊥-elim (≤⇒≯ 2^cnt≤len (begin
                          2 ≡⟨⟩
                          2 * 1 ≤⟨ *-monoʳ-≤ 2 (m^n>0 2 cnt) ⟩
                          2 * (2 ^ cnt) ≡⟨⟩
                          2 ^ cnt + (2 ^ cnt + 0)
                        ∎))
                        where open ≤-Reasoning
...      | go-left m>k =
           let
             instance
               m-nz : NonZero m
               m-nz = >m-nonZero m>k
           in
           (too-high , adv-teacher [ low ,+ pred m ] (begin
                       2 ^ cnt ≤⟨ bisect-sum {cnt} (≤-trans 2^cnt≤len (≤-reflexive (sym (+-suc m k)))) m>k ⟩
                       m ≡⟨ suc-pred m ⟨
                       suc (pred m)
                       ∎))
           , mkTooHigh (begin
                        suc (low + pred m) ≡⟨ +-suc low _ ⟨
                        low + suc (pred m) ≡⟨ cong (low +_) (suc-pred m) ⟩
                        low + m
                        ∎)
           , record
             { low≤ = ≤-reflexive refl
             ; high≤ = +-monoʳ-≤ low (≤-trans pred-≤ (m≤m+n m k))
             }
           where open ≤-Reasoning
...      | go-right m<k =
           let
             instance
               k-nz : NonZero k
               k-nz = >m-nonZero m<k
           in
           (too-low , adv-teacher [ (low + (1 + m)) ,+ pred k ] (begin
                       2 ^ cnt ≤⟨ bisect-sum {cnt} (≤-trans 2^cnt≤len (≤-reflexive (+-comm (suc m) k))) m<k ⟩
                       k ≡⟨ suc-pred k ⟨
                       suc (pred k)
                       ∎))
           , mkTooLow (≤-reflexive (sym (+-suc low m)))
           , record
             { low≤ = m≤m+n low _
             ; high≤ = begin
                     low + suc m + (k ∸ 1) ≡⟨ +-assoc low _ _ ⟩
                     low + (suc m + (k ∸ 1)) ≡⟨ cong (low +_) (+-suc m _) ⟨
                     low + (m + suc (k ∸ 1)) ≡⟨ cong (λ - → low + (m + -)) (suc-pred k) ⟩
                     low + (m + k)
                     ∎
             }
           where open ≤-Reasoning
...      | go-either {m'} =
           let
             2m1m : ∀ {m} → 2+ (m + suc m) ≤ 1 + 2 * (suc m)
             2m1m {m} = begin
               1 + (suc (m + suc m)) ≡⟨⟩
               1 + (suc m + suc m) ≡⟨ cong suc (2*n≡n+n (suc m)) ⟨
               1 + 2 * suc m
               ∎

             tmp : 2 ^ (suc cnt) ≤ 1 + 2 * (suc m')
             tmp = begin
               2 ^ (suc cnt) ≤⟨ 2^cnt≤len ⟩ -- 2^cnt≤len ⟩
               2+ (m' + suc m') ≤⟨ 2m1m ⟩
               1 + 2 * (suc m')
               ∎
           in
           (too-high , adv-teacher [ low ,+ pred m ] (2*n≤2*k+1⇒n≤k (2 ^ cnt) m (subst (λ - → 2 ^ (suc cnt) ≤ suc -) (sym (2*n≡n+n m)) 2^cnt≤len))) -- (2^c≤1+k cnt m tmp ))
           , (mkTooHigh (≤-reflexive (sym (+-suc low m')))
           , record
             { low≤ = ≤-reflexive refl
             ; high≤ = +-monoʳ-≤ low (≤-trans (m≤m+n m' m) pred-≤)
             })
           where open ≤-Reasoning

-- if we have a teacher who can defend for (ℕ.suc cnt) many rounds,
-- this implies that the δ function is always 'just' and never 'nothing':

ΣAT : Set
ΣAT = (Σ[ cnt ∈ ℕ ] (AT cnt))

δ-f : ΣAT → ℕ → Maybe (WrongNat × ΣAT)
δ-f (zero , t) hyp = nothing
δ-f (suc n , t) hyp = just
    (proj₁ (proj₁ (δ-prop t hyp)) , (n , proj₂ (proj₁ (δ-prop t hyp))))

δ-nat : Teacherδ ΣAT
δ-nat = mkTeacherδ δ-f
-- Explicit definition:
-- δ-nat (guess hyp ret next) (zero , t) = inj₁ ret
-- δ-nat (guess hyp ret next) (suc r , t) = inj₂ ((r , proj₂ (proj₁ (δ-prop t hyp))) , next (proj₁ (proj₁ (δ-prop t hyp))))

δ-is-nat : TeacherIsNatural δ-nat
δ-is-nat = mkTeacherδ-IsNatural δ-f

module Properties {ℓr : Level} (R : Set ℓr)  {ℓc : Level} (C : Set ℓc) (learner : Learnerδ R C) where
  open Game NatLearning-Semantics δ-nat learner
  
  game-lower-bound : ∀ r (t : AT r) c →
                       ∃[ sec ]
                         (AT.interval t) ⊧ sec
                         × still-possible ((r , t) , c) r sec 
  game-lower-bound ℕ.zero (adv-teacher [ low ,+ len ] _) c =
    low , (mk⊧ 0 len (+-identityʳ _) refl) , tt
  game-lower-bound (ℕ.suc r) t c with (learner c)
  ... | guess hyp ret next with (δ-prop t hyp)
  ... | (wn , t') , ⟦t'⟧⊆⟦wn⟧ , t'⊆t =
    let
      sec , t'⊧sec , still-poss = game-lower-bound r t' (next wn)
    in
    sec , ⊆iv-preserve-⊧ t'⊆t t'⊧sec , ⟦t'⟧⊆⟦wn⟧ {sec} (_⊧_.⟦iv⟧ t'⊧sec) , still-poss
    where
      query : F R ((ℕ → Set) × C)
      query = ⟦F learner c ⟧

adversarial-teacher : Interval → NatTeacher L.0ℓ
adversarial-teacher iv = record
  { T = ΣAT
  ; st₀ = ⌊log₂ iv.len ⌋ , adv-teacher iv (2^⌊log₂n⌋≤1+n iv.len)
  ; δ = δ-f
  }
  where
    module iv = Interval iv

-- Main Theorem: Lower bound for Nat learning:
-- For every interval of length m, we can construct a teacher,
-- such that no client can win the learning game
-- within ⌊log₂ m ⌋ rounds.
-- Concretely, we phrase this as: for every client, all guesses
-- within ⌊log₂ m ⌋ were wrong and there is still some
-- number (in the interval) that is consistent with all
-- the answers by the teacher
thm-adversarial-teacher : ∀ {ℓc : Level} interval →
   ∃[ teacher ] ∀ (learner : Learner ℓc)
    → ∃[ sec ] interval ⊧ sec
      × learner can't-learn sec from teacher within ⌊log₂ (Interval.len interval) ⌋ rounds

thm-adversarial-teacher iv =
  record { T = ΣAT ; st₀ = st₀ ; δ = δ-f }
  , λ learner →
    let
        module learner = Learner learner
        open Properties learner.R learner.C learner.δ
    in
    game-lower-bound ⌊log₂ iv.len ⌋ t learner.q₀
  where
    module iv = Interval iv
    t = adv-teacher iv (2^⌊log₂n⌋≤1+n iv.len)
    st₀ : ΣAT
    st₀ = ⌊log₂ iv.len ⌋ , t

-- Steve Ballmer describes the following job interview question:
-- "I'm thinking of a number between 1 and 100. You can guess. After each guess, I
-- will tell you wether you're 'high' or 'low'."
-- see: https://youtu.be/svCYbkS0Sjk
-- In above video, it takes 7 question for the interviewer to guess the number correct.
-- And indeed, we can show, that the teacher can enforce it that the learner is not possible
-- to guess the number correctly within 6 guesses:
example-adversarial-100 : ∀ {ℓc : Level} →
   ∃[ teacher ] ∀ (learner : Learner ℓc)
    → ∃[ sec ] [ 1 ,+ 99 ] ⊧ sec
      × learner can't-learn sec from teacher within 6 rounds

example-adversarial-100 = thm-adversarial-teacher [ 1 ,+ 99 ]