UCB algorithm #
noncomputable def
Bandits.ucbWidth'
{K : ℕ}
(c : ℝ)
(n : ℕ)
(h : ↥(Finset.Iic n) → Fin K × ℝ)
(a : Fin K)
:
The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.
Equations
- Bandits.ucbWidth' c n h a = √(c * Real.log (↑n + 2) / ↑(Learning.pullCount' n h a))
Instances For
theorem
Bandits.UCB.measurable_nextArm
{K : ℕ}
(hK : 0 < K)
(c : ℝ)
(n : ℕ)
:
Measurable (nextArm hK c n)
The UCB algorithm.
Equations
- Bandits.ucbAlgorithm hK c = Learning.detAlgorithm (Bandits.UCB.nextArm hK c) ⋯ ⟨0, hK⟩
Instances For
noncomputable def
Bandits.UCB.ucbWidth
{K : ℕ}
{Ω : Type u_1}
(A : ℕ → Ω → Fin K)
(c : ℝ)
(a : Fin K)
(n : ℕ)
(ω : Ω)
:
The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.
Equations
- Bandits.UCB.ucbWidth A c a n ω = √(c * Real.log (↑n + 1) / ↑(Learning.pullCount A a n ω))
Instances For
theorem
Bandits.UCB.measurable_ucbWidth
{K : ℕ}
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{A : ℕ → Ω → Fin K}
{n : ℕ}
(hA : ∀ (n : ℕ), Measurable (A n))
(c : ℝ)
(a : Fin K)
:
Measurable (ucbWidth A c a n)
theorem
Bandits.UCB.arm_zero
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
:
theorem
Bandits.UCB.arm_ae_eq_ucbNextArm
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(n : ℕ)
:
theorem
Bandits.UCB.arm_ae_all_eq
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
:
theorem
Bandits.UCB.ucbIndex_le_ucbIndex_arm
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
{n : ℕ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(a : Fin K)
(hn : K ≤ n)
:
∀ᵐ (h : Ω) ∂P, Learning.empMean A R a n h + ucbWidth A c a n h ≤ Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h
theorem
Bandits.UCB.forall_arm_eq_mod_of_lt
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
:
theorem
Bandits.UCB.forall_ucbIndex_le_ucbIndex_arm
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(a : Fin K)
:
theorem
Bandits.UCB.forall_arm_prop
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
:
theorem
Bandits.UCB.pullCount_eq_of_time_eq
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(a : Fin K)
:
∀ᵐ (ω : Ω) ∂P, Learning.pullCount A a K ω = 1
theorem
Bandits.UCB.time_gt_of_pullCount_gt_one
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(a : Fin K)
:
theorem
Bandits.UCB.pullCount_pos_of_time_ge
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
:
theorem
Bandits.UCB.pullCount_pos_of_pullCount_gt_one
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(a : Fin K)
:
∀ᵐ (ω : Ω) ∂P, ∀ (n : ℕ), 1 < Learning.pullCount A a n ω → ∀ (b : Fin K), 0 < Learning.pullCount A b n ω
theorem
Bandits.UCB.gap_arm_le_two_mul_ucbWidth
{K : ℕ}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
{Ω : Type u_1}
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
{n : ℕ}
{ω : Ω}
[Nonempty (Fin K)]
(h_best : ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω)
(h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ ∫ (x : ℝ), id x ∂ν (A n ω))
(h_le :
Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤ Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω)
:
theorem
Bandits.UCB.pullCount_arm_le
{K : ℕ}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
{Ω : Type u_1}
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
{n : ℕ}
{ω : Ω}
[Nonempty (Fin K)]
(hc : 0 ≤ c)
(h_best : ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω)
(h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ ∫ (x : ℝ), id x ∂ν (A n ω))
(h_le :
Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤ Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω)
(h_gap_pos : 0 < gap ν (A n ω))
(h_pull_pos : 0 < Learning.pullCount A (A n ω) n ω)
:
theorem
Bandits.UCB.todo
{K : ℕ}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 ≤ c)
(a : Fin K)
(n k : ℕ)
(hk : k ≠ 0)
:
theorem
Bandits.UCB.todo'
{K : ℕ}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 ≤ c)
(a : Fin K)
(n k : ℕ)
(hk : k ≠ 0)
:
theorem
Bandits.UCB.prob_ucbIndex_le
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 ≤ c)
(a : Fin K)
(n : ℕ)
:
theorem
Bandits.UCB.prob_ucbIndex_ge
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 ≤ c)
(a : Fin K)
(n : ℕ)
:
theorem
Bandits.UCB.probReal_ucbIndex_le
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 ≤ c)
(a : Fin K)
(n : ℕ)
:
theorem
Bandits.UCB.probReal_ucbIndex_ge
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 ≤ c)
(a : Fin K)
(n : ℕ)
:
theorem
Bandits.UCB.pullCount_le_add_three
{K : ℕ}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
{Ω : Type u_1}
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(a : Fin K)
(n C : ℕ)
(ω : Ω)
:
Learning.pullCount A a n ω ≤ C + 1 + ∑ s ∈ Finset.range n,
{s : ℕ | A s ω = a ∧ C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧ Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ ∫ (x : ℝ), id x ∂ν (A s ω)}.indicator
1 s + ∑ s ∈ Finset.range n,
{s : ℕ | C < Learning.pullCount A a s ω ∧ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < ∫ (x : ℝ), id x ∂ν (bestArm ν)}.indicator
1 s + ∑ s ∈ Finset.range n,
{s : ℕ | C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω}.indicator
1 s
theorem
Bandits.UCB.pullCount_le_add_three_ae
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(a : Fin K)
(n C : ℕ)
(hC : C ≠ 0)
:
∀ᵐ (ω : Ω) ∂P, Learning.pullCount A a n ω ≤ C + 1 + ∑ s ∈ Finset.range n,
{s : ℕ | A s ω = a ∧ C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧ Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ ∫ (x : ℝ), id x ∂ν (A s ω)}.indicator
1 s + ∑ s ∈ Finset.range n,
{s : ℕ | 0 < Learning.pullCount A (bestArm ν) s ω ∧ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < ∫ (x : ℝ), id x ∂ν (bestArm ν)}.indicator
1 s + ∑ s ∈ Finset.range n,
{s : ℕ | 0 < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω}.indicator
1 s
theorem
Bandits.UCB.some_sum_eq_zero
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hc : 0 ≤ c)
(a : Fin K)
(h_gap : 0 < gap ν a)
(n C : ℕ)
(hC : C ≠ 0)
(hC' : 4 * c * Real.log (↑n + 1) / gap ν a ^ 2 ≤ ↑C)
:
∀ᵐ (ω : Ω) ∂P, ∑ s ∈ Finset.range n,
{s : ℕ | A s ω = a ∧ C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧ Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ ∫ (x : ℝ), id x ∂ν (A s ω)}.indicator
1 s = 0
theorem
Bandits.UCB.pullCount_ae_le_add_two
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hc : 0 ≤ c)
(a : Fin K)
(h_gap : 0 < gap ν a)
(n C : ℕ)
(hC : C ≠ 0)
(hC' : 4 * c * Real.log (↑n + 1) / gap ν a ^ 2 ≤ ↑C)
:
∀ᵐ (ω : Ω) ∂P, Learning.pullCount A a n ω ≤ C + 1 + ∑ s ∈ Finset.range n,
{s : ℕ | 0 < Learning.pullCount A (bestArm ν) s ω ∧ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < ∫ (x : ℝ), id x ∂ν (bestArm ν)}.indicator
1 s + ∑ s ∈ Finset.range n,
{s : ℕ | 0 < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω}.indicator
1 s
A sum that appears in the UCB regret upper bound.
Equations
- Bandits.UCB.constSum c n = ∑ s ∈ Finset.range n, 1 / (↑s + 1) ^ (c / 2 - 1)
Instances For
theorem
Bandits.UCB.expectation_pullCount_le'
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 < c)
(a : Fin K)
(h_gap : 0 < gap ν a)
(n : ℕ)
:
Bound on the expectation of the number of pulls of each arm by the UCB algorithm.
theorem
Bandits.UCB.expectation_pullCount_le
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 < c)
(a : Fin K)
(h_gap : 0 < gap ν a)
(n : ℕ)
:
Bound on the expectation of the number of pulls of each arm by the UCB algorithm.
theorem
Bandits.UCB.regret_le
{K : ℕ}
{hK : 0 < K}
{c : ℝ}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P)
(hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(hc : 0 < c)
(n : ℕ)
:
Regret bound for the UCB algorithm.