The Explore-Then-Commit Algorithm #
theorem
ae_eq_set_iff
{α : Type u_1}
{mα : MeasurableSpace α}
{μ : MeasureTheory.Measure α}
{s t : Set α}
:
theorem
measurable_sum_of_le
{α : Type u_1}
{mα : MeasurableSpace α}
{f : ℕ → α → ℝ}
{g : α → ℕ}
{n : ℕ}
(hg_le : ∀ (a : α), g a ≤ n)
(hf : ∀ (i : ℕ), Measurable (f i))
(hg : Measurable g)
:
Measurable fun (a : α) => ∑ i ∈ Finset.Icc 1 (g a), f i a
The Explore-Then-Commit algorithm.
Equations
- Bandits.etcAlgorithm hK m = Learning.detAlgorithm (Bandits.ETC.nextArm hK m) ⋯ ⟨0, hK⟩
Instances For
theorem
Bandits.ETC.arm_zero
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
:
theorem
Bandits.ETC.arm_ae_eq_etcNextArm
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
theorem
Bandits.ETC.arm_of_lt
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hn : n < K * m)
:
theorem
Bandits.ETC.arm_mul
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hm : m ≠ 0)
:
theorem
Bandits.ETC.arm_add_one_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hm : m ≠ 0)
(hn : K * m ≤ n)
:
theorem
Bandits.ETC.arm_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hm : m ≠ 0)
(hn : K * m ≤ n)
:
theorem
Bandits.ETC.pullCount_mul
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
:
theorem
Bandits.ETC.pullCount_add_one_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(hm : m ≠ 0)
{n : ℕ}
(hn : K * m ≤ n)
:
theorem
Bandits.ETC.pullCount_of_ge
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(hm : m ≠ 0)
{n : ℕ}
(hn : K * m ≤ n)
:
theorem
Bandits.ETC.sumRewards_bestArm_le_of_arm_mul_eq
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(hm : m ≠ 0)
:
have this := ⋯;
∀ᵐ (h : ℕ → Fin K × ℝ) ∂Bandit.trajMeasure (etcAlgorithm hK m) ν, arm (K * m) h = a → sumRewards (bestArm ν) (K * m) h ≤ sumRewards a (K * m) h
theorem
Bandits.ETC.identDistrib_aux
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(m✝ : ℕ)
(a b : Fin K)
:
ProbabilityTheory.IdentDistrib
(fun (ω : (ℕ → Fin K × ℝ) × (ℕ → Fin K → ℝ)) =>
(∑ s ∈ Finset.Icc 1 m✝, rewardByCount a s ω.1 ω.2, ∑ s ∈ Finset.Icc 1 m✝, rewardByCount b s ω.1 ω.2))
(fun (ω : (ℕ → Fin K × ℝ) × (ℕ → Fin K → ℝ)) => (∑ s ∈ Finset.range m✝, ω.2 s a, ∑ s ∈ Finset.range m✝, ω.2 s b))
(Bandit.measure (etcAlgorithm hK m) ν) (Bandit.measure (etcAlgorithm hK m) ν)
theorem
Bandits.ETC.prob_arm_mul_eq_le
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(a : Fin K)
(hm : m ≠ 0)
:
theorem
Bandits.ETC.expectation_pullCount_le
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(a : Fin K)
(hm : m ≠ 0)
{n : ℕ}
(hn : K * m ≤ n)
:
theorem
Bandits.ETC.integrable_pullCount
{K : ℕ}
{hK : 0 < K}
{m : ℕ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : Fin K)
(n : ℕ)
:
MeasureTheory.Integrable (fun (ω : ℕ → Fin K × ℝ) => ↑(pullCount a n ω)) (Bandit.trajMeasure (etcAlgorithm hK m) ν)