Documentation

LeanBandits.ETC

The Explore-Then-Commit Algorithm #

theorem ae_eq_set_iff {α : Type u_1} { : MeasurableSpace α} {μ : MeasureTheory.Measure α} {s t : Set α} :
s =ᵐ[μ] t ∀ᵐ (a : α) μ, a s a t
theorem measurable_sum_of_le {α : Type u_1} { : MeasurableSpace α} {f : α} {g : α} {n : } (hg_le : ∀ (a : α), g a n) (hf : ∀ (i : ), Measurable (f i)) (hg : Measurable g) :
Measurable fun (a : α) => iFinset.Icc 1 (g a), f i a
noncomputable def Bandits.ETC.nextArm {K : } (hK : 0 < K) (m n : ) (h : { x : // x Finset.Iic n }Fin K × ) :
Fin K

Arm pulled by the ETC algorithm at time n + 1.

Equations
Instances For
    theorem Bandits.ETC.measurable_nextArm {K : } (hK : 0 < K) (m n : ) :
    noncomputable def Bandits.etcAlgorithm {K : } (hK : 0 < K) (m : ) :

    The Explore-Then-Commit algorithm.

    Equations
    Instances For
      theorem Bandits.ETC.arm_ae_eq_etcNextArm {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
      arm (n + 1) =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (h : Fin K × ) => nextArm hK m n fun (i : { x : // x Finset.Iic n }) => h i
      theorem Bandits.ETC.arm_of_lt {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {n : } (hn : n < K * m) :
      arm n =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (x : Fin K × ) => n % K,
      theorem Bandits.ETC.arm_mul {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (hm : m 0) :
      have this := ; arm (K * m) =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (h : Fin K × ) => measurableArgmax (empMean' (K * m - 1)) fun (i : { x : // x Finset.Iic (K * m - 1) }) => h i
      theorem Bandits.ETC.arm_add_one_of_ge {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {n : } (hm : m 0) (hn : K * m n) :
      arm (n + 1) =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (ω : Fin K × ) => arm n ω
      theorem Bandits.ETC.arm_of_ge {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {n : } (hm : m 0) (hn : K * m n) :
      theorem Bandits.ETC.sum_mod_range_mul {K : } (hK : 0 < K) (m : ) (a : Fin K) :
      (∑ sFinset.range (K * m), if s % K, = a then 1 else 0) = m
      theorem Bandits.ETC.pullCount_mul {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (a : Fin K) :
      pullCount a (K * m) =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (x : Fin K × ) => m
      theorem Bandits.ETC.pullCount_add_one_of_ge {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (a : Fin K) (hm : m 0) {n : } (hn : K * m n) :
      pullCount a (n + 1) =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (ω : Fin K × ) => pullCount a n ω + {ω' : Fin K × | arm (K * m) ω' = a}.indicator (fun (x : Fin K × ) => 1) ω
      theorem Bandits.ETC.pullCount_of_ge {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (a : Fin K) (hm : m 0) {n : } (hn : K * m n) :
      pullCount a n =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (ω : Fin K × ) => m + (n - K * m) * {ω' : Fin K × | arm (K * m) ω' = a}.indicator (fun (x : Fin K × ) => 1) ω
      theorem Bandits.ETC.sumRewards_bestArm_le_of_arm_mul_eq {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (a : Fin K) (hm : m 0) :
      have this := ; ∀ᵐ (h : Fin K × ) Bandit.trajMeasure (etcAlgorithm hK m) ν, arm (K * m) h = asumRewards (bestArm ν) (K * m) h sumRewards a (K * m) h
      theorem Bandits.ETC.identDistrib_aux {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (m✝ : ) (a b : Fin K) :
      ProbabilityTheory.IdentDistrib (fun (ω : (Fin K × ) × (Fin K)) => (sFinset.Icc 1 m✝, rewardByCount a s ω.1 ω.2, sFinset.Icc 1 m✝, rewardByCount b s ω.1 ω.2)) (fun (ω : (Fin K × ) × (Fin K)) => (sFinset.range m✝, ω.2 s a, sFinset.range m✝, ω.2 s b)) (Bandit.measure (etcAlgorithm hK m) ν) (Bandit.measure (etcAlgorithm hK m) ν)
      theorem Bandits.ETC.prob_arm_mul_eq_le {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] ( : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ) => x - (x : ), id x ν a) 1 (ν a)) (a : Fin K) (hm : m 0) :
      (Bandit.trajMeasure (etcAlgorithm hK m) ν).real {ω : Fin K × | arm (K * m) ω = a} Real.exp (-m * gap ν a ^ 2 / 4)
      theorem Bandits.ETC.expectation_pullCount_le {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] ( : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ) => x - (x : ), id x ν a) 1 (ν a)) (a : Fin K) (hm : m 0) {n : } (hn : K * m n) :
      (x : Fin K × ), (fun (ω : Fin K × ) => (pullCount a n ω)) x Bandit.trajMeasure (etcAlgorithm hK m) ν m + (n - K * m) * Real.exp (-m * gap ν a ^ 2 / 4)
      theorem Bandits.ETC.integrable_pullCount {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] (a : Fin K) (n : ) :
      MeasureTheory.Integrable (fun (ω : Fin K × ) => (pullCount a n ω)) (Bandit.trajMeasure (etcAlgorithm hK m) ν)
      theorem Bandits.ETC.regret_le {K : } {hK : 0 < K} {m : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] ( : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ) => x - (x : ), id x ν a) 1 (ν a)) (hm : m 0) (n : ) (hn : K * m n) :
      (x : Fin K × ), regret ν n x Bandit.trajMeasure (etcAlgorithm hK m) ν a : Fin K, gap ν a * (m + (n - K * m) * Real.exp (-m * gap ν a ^ 2 / 4))