Regret #
Definitions of regret, gaps, pull counts #
noncomputable def
Bandits.regret
{α : Type u_1}
{mα : MeasurableSpace α}
(ν : ProbabilityTheory.Kernel α ℝ)
(t : ℕ)
(h : ℕ → α × ℝ)
:
Regret of a sequence of pulls k : ℕ → α
at time t
for the reward kernel ν ; Kernel α ℝ
.
Equations
Instances For
noncomputable def
Bandits.gap
{α : Type u_1}
{mα : MeasurableSpace α}
(ν : ProbabilityTheory.Kernel α ℝ)
(a : α)
:
Gap of an arm a
: difference between the highest mean of the arms and the mean of a
.
Instances For
theorem
Bandits.gap_nonneg
{α : Type u_1}
{mα : MeasurableSpace α}
{ν : ProbabilityTheory.Kernel α ℝ}
{a : α}
[Fintype α]
:
noncomputable def
Bandits.pullCount
{α : Type u_1}
[DecidableEq α]
(a : α)
(t : ℕ)
(h : ℕ → α × ℝ)
:
Number of times arm a
was pulled up to time t
(excluding t
).
Equations
- Bandits.pullCount a t h = {s ∈ Finset.range t | Bandits.arm s h = a}.card
Instances For
@[simp]
theorem
Bandits.stepsUntil_zero_of_ne
{α : Type u_1}
[DecidableEq α]
{h : ℕ → α × ℝ}
{a : α}
(hka : arm 0 h ≠ a)
:
theorem
Bandits.stepsUntil_zero_of_eq
{α : Type u_1}
[DecidableEq α]
{h : ℕ → α × ℝ}
{a : α}
(hka : arm 0 h = a)
:
theorem
Bandits.stepsUntil_pullCount_le
{α : Type u_1}
[DecidableEq α]
(h : ℕ → α × ℝ)
(a : α)
(t : ℕ)
:
theorem
Bandits.stepsUntil_one_of_eq
{α : Type u_1}
[DecidableEq α]
{h : ℕ → α × ℝ}
{a : α}
(hka : arm 0 h = a)
:
If we pull arm a
at time 0, the first time at which it is pulled once is 0.
theorem
Bandits.arm_eq_of_stepsUntil_eq_coe
{α : Type u_1}
[DecidableEq α]
{m n : ℕ}
{a : α}
{ω : ℕ → α × ℝ}
(hm : m ≠ 0)
(h : stepsUntil a m ω = ↑n)
:
Sum of rewards obtained when pulling arm a
up to time t
(exclusive).
Equations
- Bandits.sumRewards a t h = ∑ s ∈ Finset.range t, if Bandits.arm s h = a then Bandits.reward s h else 0
Instances For
Empirical mean reward obtained when pulling arm a
up to time t
(exclusive).
Equations
- Bandits.empMean a t h = Bandits.sumRewards a t h / ↑(Bandits.pullCount a t h)
Instances For
theorem
Bandits.sumRewards_eq_pullCount_mul_empMean
{α : Type u_1}
[DecidableEq α]
{h : ℕ → α × ℝ}
{t : ℕ}
{a : α}
(h_pull : pullCount a t h ≠ 0)
:
noncomputable def
Bandits.rewardByCount
{α : Type u_1}
[DecidableEq α]
(a : α)
(m : ℕ)
(h : ℕ → α × ℝ)
(z : ℕ → α → ℝ)
:
Reward obtained when pulling arm a
for the m
-th time.
Equations
- Bandits.rewardByCount a m h z = match Bandits.stepsUntil a m h with | none => z m a | some n => Bandits.reward n h
Instances For
theorem
Bandits.rewardByCount_eq_ite
{α : Type u_1}
[DecidableEq α]
(a : α)
(m : ℕ)
(h : ℕ → α × ℝ)
(z : ℕ → α → ℝ)
:
theorem
Bandits.rewardByCount_of_stepsUntil_eq_top
{α : Type u_1}
[DecidableEq α]
{m : ℕ}
{a : α}
{ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)}
(h : stepsUntil a m ω.1 = ⊤)
:
theorem
Bandits.rewardByCount_of_stepsUntil_eq_coe
{α : Type u_1}
[DecidableEq α]
{m n : ℕ}
{a : α}
{ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)}
(h : stepsUntil a m ω.1 = ↑n)
:
theorem
Bandits.sum_rewardByCount_eq_sumRewards
{α : Type u_1}
[DecidableEq α]
(a : α)
(t : ℕ)
(h : ℕ → α × ℝ)
(z : ℕ → α → ℝ)
:
theorem
Bandits.sum_pullCount_mul
{α : Type u_1}
[DecidableEq α]
[Fintype α]
(h : ℕ → α × ℝ)
(f : α → ℝ)
(t : ℕ)
:
theorem
Bandits.regret_eq_sum_pullCount_mul_gap
{α : Type u_1}
[DecidableEq α]
{mα : MeasurableSpace α}
{ν : ProbabilityTheory.Kernel α ℝ}
{h : ℕ → α × ℝ}
{t : ℕ}
[Fintype α]
:
noncomputable def
Bandits.bestArm
{α : Type u_1}
{mα : MeasurableSpace α}
[Fintype α]
[Nonempty α]
(ν : ProbabilityTheory.Kernel α ℝ)
:
α
Arm with the highest mean.
Equations
- Bandits.bestArm ν = ⋯.choose
Instances For
@[simp]
theorem
Bandits.gap_bestArm
{α : Type u_1}
{mα : MeasurableSpace α}
{ν : ProbabilityTheory.Kernel α ℝ}
[Fintype α]
[Nonempty α]
: