Tools to build bandit algorithms #
noncomputable def
Bandits.pullCount'
{α : Type u_1}
[DecidableEq α]
(n : ℕ)
(h : ↥(Finset.Iic n) → α × ℝ)
(a : α)
 :
Number of pulls of arm a up to (and including) time n.
Equations
- Bandits.pullCount' n h a = {s : ↥(Finset.Iic n) | (h s).1 = a}.card
Instances For
noncomputable def
Bandits.sumRewards'
{α : Type u_1}
[DecidableEq α]
(n : ℕ)
(h : ↥(Finset.Iic n) → α × ℝ)
(a : α)
 :
Sum of rewards of arm a up to (and including) time n.
Equations
- Bandits.sumRewards' n h a = ∑ s : ↥(Finset.Iic n), if (h s).1 = a then (h s).2 else 0
Instances For
noncomputable def
Bandits.empMean'
{α : Type u_1}
[DecidableEq α]
(n : ℕ)
(h : ↥(Finset.Iic n) → α × ℝ)
(a : α)
 :
Empirical mean of arm a at time n.
Equations
- Bandits.empMean' n h a = Bandits.sumRewards' n h a / ↑(Bandits.pullCount' n h a)
Instances For
theorem
Bandits.pullCount'_eq_sum
{α : Type u_1}
[DecidableEq α]
(n : ℕ)
(h : ↥(Finset.Iic n) → α × ℝ)
(a : α)
 :
theorem
Bandits.measurable_pullCount'
{α : Type u_1}
[DecidableEq α]
[MeasurableSpace α]
[MeasurableSingletonClass α]
(n : ℕ)
(a : α)
 :
Measurable fun (h : ↥(Finset.Iic n) → α × ℝ) => pullCount' n h a
theorem
Bandits.measurable_sumRewards'
{α : Type u_1}
[DecidableEq α]
[MeasurableSpace α]
[MeasurableSingletonClass α]
(n : ℕ)
(a : α)
 :
Measurable fun (h : ↥(Finset.Iic n) → α × ℝ) => sumRewards' n h a
theorem
Bandits.measurable_empMean'
{α : Type u_1}
[DecidableEq α]
[MeasurableSpace α]
[MeasurableSingletonClass α]
(n : ℕ)
(a : α)
 :
Measurable fun (h : ↥(Finset.Iic n) → α × ℝ) => empMean' n h a