UCB algorithm #

source

noncomputable def Bandits.ucbWidth' {K : ℕ} (c : ℝ) (n : ℕ) (h : ↥(Finset.Iic n) → Fin K × ℝ) (a : Fin K) :

ℝ

The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.

Equations

Bandits.ucbWidth' c n h a = √(2 * c * Real.log (↑n + 2) / ↑(Learning.pullCount' n h a))

Instances For

source

noncomputable def Bandits.UCB.nextArm {K : ℕ} (hK : 0 < K) (c : ℝ) (n : ℕ) (h : ↥(Finset.Iic n) → Fin K × ℝ) :

Fin K

Arm pulled by the UCB algorithm at time n + 1.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

theorem Bandits.UCB.measurable_nextArm {K : ℕ} (hK : 0 < K) (c : ℝ) (n : ℕ) :

Measurable (nextArm hK c n)

source

noncomputable def Bandits.ucbAlgorithm {K : ℕ} (hK : 0 < K) (c : ℝ) :

Learning.Algorithm (Fin K) ℝ

The UCB algorithm.

Equations

Bandits.ucbAlgorithm hK c = Learning.detAlgorithm (Bandits.UCB.nextArm hK c) ⋯ ⟨0, hK⟩

Instances For

source

theorem Bandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) :

Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K - 1)

Until round K - 1, the UCB algorithm behaves like the Round-Robin algorithm.

source

noncomputable def Bandits.UCB.ucbWidth {K : ℕ} {Ω : Type u_1} (A : ℕ → Ω → Fin K) (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) :

ℝ

The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.

Equations

Bandits.UCB.ucbWidth A c a n ω = √(2 * c * Real.log (↑n + 1) / ↑(Learning.pullCount A a n ω))

Instances For

source

theorem Bandits.UCB.measurable_ucbWidth {K : ℕ} {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {A : ℕ → Ω → Fin K} {n : ℕ} (hA : ∀ (n : ℕ), Measurable (A n)) (c : ℝ) (a : Fin K) :

Measurable (ucbWidth A c a n)

source

theorem Bandits.UCB.ucbWidth_eq_ucbWidth' {K : ℕ} {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) (hn : n ≠ 0) :

ucbWidth A c a n ω = ucbWidth' c (n - 1) (Learning.IsAlgEnvSeq.hist A R (n - 1) ω) a

source

theorem Bandits.UCB.arm_zero {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) :

A 0 =ᵐ[P] fun (x : Ω) => ⟨0, hK⟩

source

theorem Bandits.UCB.arm_ae_eq_ucbNextArm {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (n : ℕ) :

A (n + 1) =ᵐ[P] fun (ω : Ω) => nextArm hK c n (Learning.IsAlgEnvSeq.hist A R n ω)

source

theorem Bandits.UCB.arm_ae_all_eq {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) :

∀ᵐ (h : Ω) ∂P, A 0 h = ⟨0, hK⟩ ∧ ∀ (n : ℕ), A (n + 1) h = nextArm hK c n (Learning.IsAlgEnvSeq.hist A R n h)

source

theorem Bandits.UCB.ucbIndex_le_ucbIndex_arm {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {n : ℕ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) (hn : K ≤ n) :

∀ᵐ (h : Ω) ∂P, Learning.empMean A R a n h + ucbWidth A c a n h ≤ Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h

source

theorem Bandits.UCB.forall_arm_eq_mod_of_lt {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) :

∀ᵐ (h : Ω) ∂P, ∀ n < K, A n h = ⟨n % K, ⋯⟩

source

theorem Bandits.UCB.forall_ucbIndex_le_ucbIndex_arm {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) :

∀ᵐ (h : Ω) ∂P, ∀ (n : ℕ), K ≤ n → Learning.empMean A R a n h + ucbWidth A c a n h ≤ Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h

source

theorem Bandits.UCB.forall_arm_prop {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) :

∀ᵐ (h : Ω) ∂P, (∀ n < K, A n h = ⟨n % K, ⋯⟩) ∧ ∀ (n : ℕ), K ≤ n → ∀ (a : Fin K), Learning.empMean A R a n h + ucbWidth A c a n h ≤ Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h

source

theorem Bandits.UCB.time_gt_of_pullCount_gt_one {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) :

∀ᵐ (ω : Ω) ∂P, ∀ (n : ℕ), 1 < Learning.pullCount A a n ω → K < n

source

theorem Bandits.UCB.pullCount_pos_of_pullCount_gt_one {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) :

∀ᵐ (ω : Ω) ∂P, ∀ (n : ℕ), 1 < Learning.pullCount A a n ω → ∀ (b : Fin K), 0 < Learning.pullCount A b n ω

source

theorem Bandits.UCB.gap_arm_le_two_mul_ucbWidth {K : ℕ} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} [Nonempty (Fin K)] (h_best : ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω) (h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ ∫ (x : ℝ), id x ∂ν (A n ω)) (h_le : Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤ Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) :

gap ν (A n ω) ≤ 2 * ucbWidth A c (A n ω) n ω

source

theorem Bandits.UCB.pullCount_arm_le {K : ℕ} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} [Nonempty (Fin K)] (hc : 0 ≤ c) (h_best : ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω) (h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ ∫ (x : ℝ), id x ∂ν (A n ω)) (h_le : Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤ Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) (h_gap_pos : 0 < gap ν (A n ω)) (h_pull_pos : 0 < Learning.pullCount A (A n ω) n ω) :

↑(Learning.pullCount A (A n ω) n ω) ≤ 8 * c * Real.log (↑n + 1) / gap ν (A n ω) ^ 2

source

theorem Bandits.UCB.prob_ucbIndex_le {K : ℕ} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) ℝ} (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :

P {h : Ω | 0 < Learning.pullCount A a n h ∧ Learning.empMean A R a n h + ucbWidth A (c * ↑σ2) a n h ≤ ∫ (x : ℝ), id x ∂ν a} ≤ 1 / (↑n + 1) ^ (c - 1)

source

theorem Bandits.UCB.prob_ucbIndex_ge {K : ℕ} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) ℝ} (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :

P {h : Ω | 0 < Learning.pullCount A a n h ∧ ∫ (x : ℝ), id x ∂ν a ≤ Learning.empMean A R a n h - ucbWidth A (c * ↑σ2) a n h} ≤ 1 / (↑n + 1) ^ (c - 1)

source

theorem Bandits.UCB.probReal_ucbIndex_le {K : ℕ} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) ℝ} (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :

P.real {h : Ω | 0 < Learning.pullCount A a n h ∧ Learning.empMean A R a n h + ucbWidth A (c * ↑σ2) a n h ≤ ∫ (x : ℝ), id x ∂ν a} ≤ 1 / (↑n + 1) ^ (c - 1)

source

theorem Bandits.UCB.probReal_ucbIndex_ge {K : ℕ} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) ℝ} (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :

P.real {h : Ω | 0 < Learning.pullCount A a n h ∧ ∫ (x : ℝ), id x ∂ν a ≤ Learning.empMean A R a n h - ucbWidth A (c * ↑σ2) a n h} ≤ 1 / (↑n + 1) ^ (c - 1)

source

theorem Bandits.UCB.pullCount_le_add_three {K : ℕ} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (a : Fin K) (n C : ℕ) (ω : Ω) :

Learning.pullCount A a n ω ≤ C + 1 + ∑ s ∈ Finset.range n, {s : ℕ | A s ω = a ∧ C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧ Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ ∫ (x : ℝ), id x ∂ν (A s ω)}.indicator 1 s + ∑ s ∈ Finset.range n, {s : ℕ | C < Learning.pullCount A a s ω ∧ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < ∫ (x : ℝ), id x ∂ν (bestArm ν)}.indicator 1 s + ∑ s ∈ Finset.range n, {s : ℕ | C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω}.indicator 1 s

source

theorem Bandits.UCB.pullCount_le_add_three_ae {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) (n C : ℕ) (hC : C ≠ 0) :

∀ᵐ (ω : Ω) ∂P, Learning.pullCount A a n ω ≤ C + 1 + ∑ s ∈ Finset.range n, {s : ℕ | A s ω = a ∧ C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧ Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ ∫ (x : ℝ), id x ∂ν (A s ω)}.indicator 1 s + ∑ s ∈ Finset.range n, {s : ℕ | 0 < Learning.pullCount A (bestArm ν) s ω ∧ Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < ∫ (x : ℝ), id x ∂ν (bestArm ν)}.indicator 1 s + ∑ s ∈ Finset.range n, {s : ℕ | 0 < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω}.indicator 1 s

source

theorem Bandits.UCB.some_sum_eq_zero {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2)) (Learning.stationaryEnv ν) P) (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ℕ) (hC : C ≠ 0) (hC' : 8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 ≤ ↑C) :

∀ᵐ (ω : Ω) ∂P, ∑ s ∈ Finset.range n, {s : ℕ | A s ω = a ∧ C < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤ Learning.empMean A R (bestArm ν) s ω + ucbWidth A (c * ↑σ2) (bestArm ν) s ω ∧ Learning.empMean A R (A s ω) s ω - ucbWidth A (c * ↑σ2) (A s ω) s ω ≤ ∫ (x : ℝ), id x ∂ν (A s ω)}.indicator 1 s = 0

source

theorem Bandits.UCB.pullCount_ae_le_add_two {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2)) (Learning.stationaryEnv ν) P) (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ℕ) (hC : C ≠ 0) (hC' : 8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 ≤ ↑C) :

∀ᵐ (ω : Ω) ∂P, Learning.pullCount A a n ω ≤ C + 1 + ∑ s ∈ Finset.range n, {s : ℕ | 0 < Learning.pullCount A (bestArm ν) s ω ∧ Learning.empMean A R (bestArm ν) s ω + ucbWidth A (c * ↑σ2) (bestArm ν) s ω < ∫ (x : ℝ), id x ∂ν (bestArm ν)}.indicator 1 s + ∑ s ∈ Finset.range n, {s : ℕ | 0 < Learning.pullCount A a s ω ∧ ∫ (x : ℝ), id x ∂ν a < Learning.empMean A R a s ω - ucbWidth A (c * ↑σ2) a s ω}.indicator 1 s

source

noncomputable def Bandits.UCB.constSum (c : ℝ) (n : ℕ) :

ENNReal

A sum that appears in the UCB regret upper bound.

Equations

Bandits.UCB.constSum c n = ∑ s ∈ Finset.range n, 1 / (↑s + 1) ^ (c - 1)

Instances For

source

theorem Bandits.UCB.constSum_lt_top (c : ℝ) (n : ℕ) :

constSum c n < ⊤

source

theorem Bandits.UCB.expectation_pullCount_le' {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2)) (Learning.stationaryEnv ν) P) (hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ℕ) :

∫⁻ (ω : Ω), ↑(Learning.pullCount A a n ω) ∂P ≤ ENNReal.ofReal (8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 + 1) + 1 + 2 * constSum c n

Bound on the expectation of the number of pulls of each arm by the UCB algorithm.

source

theorem Bandits.UCB.expectation_pullCount_le {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2)) (Learning.stationaryEnv ν) P) (hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ℕ) :

∫ (x : Ω), (fun (ω : Ω) => ↑(Learning.pullCount A a n ω)) x ∂P ≤ 8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 + 2 + 2 * (constSum c n).toReal

Bound on the expectation of the number of pulls of each arm by the UCB algorithm.

source

theorem Bandits.UCB.regret_le {K : ℕ} {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2)) (Learning.stationaryEnv ν) P) (hν : ∀ (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 < c) (n : ℕ) :

∫ (x : Ω), regret ν A n x ∂P ≤ ∑ a : Fin K, (8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a + gap ν a * (2 + 2 * (constSum c n).toReal))

Regret bound for the UCB algorithm.

Documentation

LeanBandits.BanditAlgorithms.UCB

UCB algorithm #