The Explore-Then-Commit Algorithm #
theorem
ae_eq_set_iff
{α : Type u_1}
{mα : MeasurableSpace α}
{μ : MeasureTheory.Measure α}
{s t : Set α}
 :
theorem
measurable_sum_of_le
{α : Type u_1}
{mα : MeasurableSpace α}
{f : ℕ → α → ℝ}
{g : α → ℕ}
{n : ℕ}
(hg_le : ∀ (a : α), g a ≤ n)
(hf : ∀ (i : ℕ), Measurable (f i))
(hg : Measurable g)
 :
Measurable fun (a : α) => ∑ i ∈ Finset.Icc 1 (g a), f i a
noncomputable def
Bandits.ETC.nextArm
{K : ℕ}
(hK : 0 < K)
(m n : ℕ)
(h : ↥(Finset.Iic n) → Fin K × ℝ)
 :
Fin K
Arm pulled by the ETC algorithm at time n + 1.
Equations
Instances For
The Explore-Then-Commit algorithm.
Equations
- Bandits.etcAlgorithm hK m = Learning.detAlgorithm (Bandits.ETC.nextArm hK m) ⋯ ⟨0, hK⟩
Instances For
theorem
Bandits.ETC.arm_zero
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
 :
theorem
Bandits.ETC.arm_ae_eq_etcNextArm
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
 :
arm (n + 1) =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (h : ℕ → Fin K × ℝ) =>
  nextArm hK m n fun (i : ↥(Finset.Iic n)) => h ↑i
theorem
Bandits.ETC.arm_of_lt
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hn : n < K * m)
 :
theorem
Bandits.ETC.arm_mul
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hm : m ≠ 0)
 :
have this := ⋯;
arm (K * m) =ᵐ[Bandit.trajMeasure (etcAlgorithm hK m) ν] fun (h : ℕ → Fin K × ℝ) =>
  measurableArgmax (empMean' (K * m - 1)) fun (i : ↥(Finset.Iic (K * m - 1))) => h ↑i
theorem
Bandits.ETC.arm_add_one_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hm : m ≠ 0)
(hn : K * m ≤ n)
 :
theorem
Bandits.ETC.arm_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hm : m ≠ 0)
(hn : K * m ≤ n)
 :
theorem
Bandits.ETC.pullCount_mul
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
 :
theorem
Bandits.ETC.pullCount_add_one_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(hm : m ≠ 0)
{n : ℕ}
(hn : K * m ≤ n)
 :
theorem
Bandits.ETC.pullCount_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(hm : m ≠ 0)
{n : ℕ}
(hn : K * m ≤ n)
 :
theorem
Bandits.ETC.sumRewards_bestArm_le_of_arm_mul_eq
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(hm : m ≠ 0)
 :
have this := ⋯;
∀ᵐ (h : ℕ → Fin K × ℝ) ∂Bandit.trajMeasure (etcAlgorithm hK m) ν,   arm (K * m) h = a → sumRewards (bestArm ν) (K * m) h ≤ sumRewards a (K * m) h
theorem
Bandits.ETC.identDistrib_aux
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(m✝ : ℕ)
(a b : Fin K)
 :
ProbabilityTheory.IdentDistrib
  (fun (ω : (ℕ → Fin K × ℝ) × (ℕ → Fin K → ℝ)) =>
    (∑ s ∈ Finset.Icc 1 m✝, rewardByCount a s ω.1 ω.2, ∑ s ∈ Finset.Icc 1 m✝, rewardByCount b s ω.1 ω.2))
  (fun (ω : (ℕ → Fin K × ℝ) × (ℕ → Fin K → ℝ)) => (∑ s ∈ Finset.range m✝, ω.2 s a, ∑ s ∈ Finset.range m✝, ω.2 s b))
  (Bandit.measure (etcAlgorithm hK m) ν) (Bandit.measure (etcAlgorithm hK m) ν)
theorem
Bandits.ETC.prob_arm_mul_eq_le
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(a : Fin K)
(hm : m ≠ 0)
 :
theorem
Bandits.ETC.expectation_pullCount_le
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(a : Fin K)
(hm : m ≠ 0)
{n : ℕ}
(hn : K * m ≤ n)
 :
theorem
Bandits.ETC.integrable_pullCount
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(n : ℕ)
 :
MeasureTheory.Integrable (fun (ω : ℕ → Fin K × ℝ) => ↑(pullCount a n ω)) (Bandit.trajMeasure (etcAlgorithm hK m) ν)