Round-Robin algorithm #
That algorithm pulls each arm in a round-robin fashion.
The Round-Robin algorithm: deterministic algorithm that chooses the next arm according
to RoundRobin.nextArm.
Equations
- Bandits.roundRobinAlgorithm hK = Learning.detAlgorithm (fun (n : ℕ) (x : ↥(Finset.Iic n) → Fin K × ℝ) => Bandits.RoundRobin.nextArm hK n) ⋯ ⟨0, hK⟩
Instances For
theorem
Bandits.RoundRobin.arm_zero
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P 0)
:
theorem
Bandits.RoundRobin.arm_ae_eq_roundRobinNextArm
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(n : ℕ)
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (n + 1))
:
theorem
Bandits.RoundRobin.arm_ae_eq
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(n : ℕ)
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P n)
:
The arm pulled at time n is the arm n % K.
theorem
Bandits.RoundRobin.pullCount_mul
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(m : ℕ)
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K * m - 1))
(a : Fin K)
:
At time K * m, the number of pulls of each arm is equal to m.
theorem
Bandits.RoundRobin.pullCount_eq_one
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K - 1))
(a : Fin K)
:
theorem
Bandits.RoundRobin.time_gt_of_pullCount_gt_one
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K - 1))
(a : Fin K)
:
theorem
Bandits.RoundRobin.pullCount_pos_of_time_ge
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K - 1))
:
theorem
Bandits.RoundRobin.pullCount_pos_of_pullCount_gt_one
{K : ℕ}
{hK : 0 < K}
{ν : ProbabilityTheory.Kernel (Fin K) ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{Ω : Type u_1}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → Fin K}
{R : ℕ → Ω → ℝ}
[Nonempty (Fin K)]
(h : Learning.IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K - 1))
(a : Fin K)
:
∀ᵐ (ω : Ω) ∂P, ∀ (n : ℕ), 1 < Learning.pullCount A a n ω → ∀ (b : Fin K), 0 < Learning.pullCount A b n ω