Regret #

Definitions of regret, gaps, pull counts #

noncomputable def Bandits.regret {α : Type u_1} {mα : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ℝ) (t : ℕ) (h : ℕ → α × ℝ) :

ℝ

Regret of a sequence of pulls k : ℕ → α at time t for the reward kernel ν ; Kernel α ℝ.

Equations

Bandits.regret ν t h = (↑t * ⨆ (a : α), ∫ (x : ℝ), id x ∂ν a) - ∑ s ∈ Finset.range t, ∫ (x : ℝ), id x ∂ν (Bandits.arm s h)

Instances For

source

noncomputable def Bandits.gap {α : Type u_1} {mα : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ℝ) (a : α) :

ℝ

Gap of an arm a: difference between the highest mean of the arms and the mean of a.

Equations

Bandits.gap ν a = (⨆ (i : α), ∫ (x : ℝ), id x ∂ν i) - ∫ (x : ℝ), id x ∂ν a

Instances For

source

theorem Bandits.gap_nonneg {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} {a : α} [Fintype α] :

0 ≤ gap ν a

source

noncomputable def Bandits.pullCount {α : Type u_1} [DecidableEq α] (a : α) (t : ℕ) (h : ℕ → α × ℝ) :

ℕ

Number of times arm a was pulled up to time t (excluding t).

Equations

Bandits.pullCount a t h = {s ∈ Finset.range t | Bandits.arm s h = a}.card

Instances For

source

@[simp]

theorem Bandits.pullCount_zero {α : Type u_1} [DecidableEq α] (a : α) (h : ℕ → α × ℝ) :

pullCount a 0 h = 0

source

theorem Bandits.pullCount_one {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {a : α} :

pullCount a 1 h = if arm 0 h = a then 1 else 0

source

theorem Bandits.monotone_pullCount {α : Type u_1} [DecidableEq α] (a : α) (h : ℕ → α × ℝ) :

Monotone fun (x : ℕ) => pullCount a x h

source

theorem Bandits.pullCount_eq_pullCount_add_one {α : Type u_1} [DecidableEq α] (t : ℕ) (h : ℕ → α × ℝ) :

pullCount (arm t h) (t + 1) h = pullCount (arm t h) t h + 1

source

theorem Bandits.pullCount_eq_pullCount {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {t : ℕ} {a : α} (ha : arm t h ≠ a) :

pullCount a (t + 1) h = pullCount a t h

source

theorem Bandits.pullCount_add_one {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {t : ℕ} {a : α} :

pullCount a (t + 1) h = pullCount a t h + if arm t h = a then 1 else 0

source

theorem Bandits.pullCount_eq_sum {α : Type u_1} [DecidableEq α] (a : α) (t : ℕ) (h : ℕ → α × ℝ) :

pullCount a t h = ∑ s ∈ Finset.range t, if arm s h = a then 1 else 0

source

theorem Bandits.pullCount_le {α : Type u_1} [DecidableEq α] (a : α) (t : ℕ) (h : ℕ → α × ℝ) :

pullCount a t h ≤ t

source

noncomputable def Bandits.stepsUntil {α : Type u_1} [DecidableEq α] (a : α) (m : ℕ) (h : ℕ → α × ℝ) :

ℕ∞

Number of steps until arm a was pulled exactly m times.

Equations

Bandits.stepsUntil a m h = sInf (Nat.cast '' {s : ℕ | Bandits.pullCount a (s + 1) h = m})

Instances For

source

theorem Bandits.stepsUntil_eq_top_iff {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {m : ℕ} {a : α} :

stepsUntil a m h = ⊤ ↔ ∀ (s : ℕ), pullCount a (s + 1) h ≠ m

source

theorem Bandits.stepsUntil_zero_of_ne {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {a : α} (hka : arm 0 h ≠ a) :

stepsUntil a 0 h = 0

source

theorem Bandits.stepsUntil_zero_of_eq {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {a : α} (hka : arm 0 h = a) :

stepsUntil a 0 h = ⊤

source

theorem Bandits.stepsUntil_eq_dite {α : Type u_1} [DecidableEq α] (a : α) (m : ℕ) (h : ℕ → α × ℝ) [Decidable (∃ (s : ℕ), pullCount a (s + 1) h = m)] :

stepsUntil a m h = if h_1 : ∃ (s : ℕ), pullCount a (s + 1) h = m then ↑(Nat.find h_1) else ⊤

source

theorem Bandits.stepsUntil_pullCount_le {α : Type u_1} [DecidableEq α] (h : ℕ → α × ℝ) (a : α) (t : ℕ) :

stepsUntil a (pullCount a (t + 1) h) h ≤ ↑t

source

theorem Bandits.stepsUntil_pullCount_eq {α : Type u_1} [DecidableEq α] (h : ℕ → α × ℝ) (t : ℕ) :

stepsUntil (arm t h) (pullCount (arm t h) (t + 1) h) h = ↑t

source

theorem Bandits.stepsUntil_one_of_eq {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {a : α} (hka : arm 0 h = a) :

stepsUntil a 1 h = 0

If we pull arm a at time 0, the first time at which it is pulled once is 0.

source

theorem Bandits.stepsUntil_eq_zero_iff {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {m : ℕ} {a : α} :

stepsUntil a m h = 0 ↔ m = 0 ∧ arm 0 h ≠ a ∨ m = 1 ∧ arm 0 h = a

source

theorem Bandits.arm_stepsUntil {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {m : ℕ} {a : α} (hm : m ≠ 0) (h_exists : ∃ (s : ℕ), pullCount a (s + 1) h = m) :

arm (stepsUntil a m h).toNat h = a

source

theorem Bandits.arm_eq_of_stepsUntil_eq_coe {α : Type u_1} [DecidableEq α] {m n : ℕ} {a : α} {ω : ℕ → α × ℝ} (hm : m ≠ 0) (h : stepsUntil a m ω = ↑n) :

arm n ω = a

source

theorem Bandits.stepsUntil_eq_congr {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {m n : ℕ} {a : α} {h' : ℕ → α × ℝ} (h_eq : ∀ i ≤ n, arm i h = arm i h') :

stepsUntil a m h = ↑n ↔ stepsUntil a m h' = ↑n

source

theorem Bandits.pullCount_stepsUntil_add_one {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {m : ℕ} {a : α} (h_exists : ∃ (s : ℕ), pullCount a (s + 1) h = m) :

pullCount a (stepsUntil a m h + 1).toNat h = m

source

theorem Bandits.pullCount_stepsUntil {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {m : ℕ} {a : α} (hm : m ≠ 0) (h_exists : ∃ (s : ℕ), pullCount a (s + 1) h = m) :

pullCount a (stepsUntil a m h).toNat h = m - 1

source

def Bandits.sumRewards {α : Type u_1} [DecidableEq α] (a : α) (t : ℕ) (h : ℕ → α × ℝ) :

ℝ

Sum of rewards obtained when pulling arm a up to time t (exclusive).

Equations

Bandits.sumRewards a t h = ∑ s ∈ Finset.range t, if Bandits.arm s h = a then Bandits.reward s h else 0

Instances For

source

noncomputable def Bandits.empMean {α : Type u_1} [DecidableEq α] (a : α) (t : ℕ) (h : ℕ → α × ℝ) :

ℝ

Empirical mean reward obtained when pulling arm a up to time t (exclusive).

Equations

Bandits.empMean a t h = Bandits.sumRewards a t h / ↑(Bandits.pullCount a t h)

Instances For

source

theorem Bandits.sumRewards_eq_pullCount_mul_empMean {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {t : ℕ} {a : α} (h_pull : pullCount a t h ≠ 0) :

sumRewards a t h = ↑(pullCount a t h) * empMean a t h

source

noncomputable def Bandits.rewardByCount {α : Type u_1} [DecidableEq α] (a : α) (m : ℕ) (h : ℕ → α × ℝ) (z : ℕ → α → ℝ) :

ℝ

Reward obtained when pulling arm a for the m-th time.

Equations

Bandits.rewardByCount a m h z = match Bandits.stepsUntil a m h with | none => z m a | some n => Bandits.reward n h

Instances For

source

theorem Bandits.rewardByCount_eq_ite {α : Type u_1} [DecidableEq α] (a : α) (m : ℕ) (h : ℕ → α × ℝ) (z : ℕ → α → ℝ) :

rewardByCount a m h z = if stepsUntil a m h = ⊤ then z m a else reward (stepsUntil a m h).toNat h

source

theorem Bandits.rewardByCount_of_stepsUntil_eq_top {α : Type u_1} [DecidableEq α] {m : ℕ} {a : α} {ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)} (h : stepsUntil a m ω.1 = ⊤) :

rewardByCount a m ω.1 ω.2 = ω.2 m a

source

theorem Bandits.rewardByCount_of_stepsUntil_eq_coe {α : Type u_1} [DecidableEq α] {m n : ℕ} {a : α} {ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)} (h : stepsUntil a m ω.1 = ↑n) :

rewardByCount a m ω.1 ω.2 = reward n ω.1

source

theorem Bandits.rewardByCount_pullCount_add_one_eq_reward {α : Type u_1} [DecidableEq α] (t : ℕ) (h : ℕ → α × ℝ) (z : ℕ → α → ℝ) :

rewardByCount (arm t h) (pullCount (arm t h) t h + 1) h z = reward t h

source

theorem Bandits.sum_rewardByCount_eq_sumRewards {α : Type u_1} [DecidableEq α] (a : α) (t : ℕ) (h : ℕ → α × ℝ) (z : ℕ → α → ℝ) :

∑ m ∈ Finset.Icc 1 (pullCount a t h), rewardByCount a m h z = sumRewards a t h

source

theorem Bandits.sum_pullCount_mul {α : Type u_1} [DecidableEq α] [Fintype α] (h : ℕ → α × ℝ) (f : α → ℝ) (t : ℕ) :

∑ a : α, ↑(pullCount a t h) * f a = ∑ s ∈ Finset.range t, f (arm s h)

source

theorem Bandits.sum_pullCount {α : Type u_1} [DecidableEq α] {h : ℕ → α × ℝ} {t : ℕ} [Fintype α] :

∑ a : α, pullCount a t h = t

source

theorem Bandits.regret_eq_sum_pullCount_mul_gap {α : Type u_1} [DecidableEq α] {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} {h : ℕ → α × ℝ} {t : ℕ} [Fintype α] :

regret ν t h = ∑ a : α, ↑(pullCount a t h) * gap ν a

source

noncomputable def Bandits.bestArm {α : Type u_1} {mα : MeasurableSpace α} [Fintype α] [Nonempty α] (ν : ProbabilityTheory.Kernel α ℝ) :

Arm with the highest mean.

Equations

Bandits.bestArm ν = ⋯.choose

Instances For

source

theorem Bandits.le_bestArm {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} [Fintype α] [Nonempty α] (a : α) :

∫ (x : ℝ), id x ∂ν a ≤ ∫ (x : ℝ), id x ∂ν (bestArm ν)

source

theorem Bandits.gap_eq_bestArm_sub {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} {a : α} [Fintype α] [Nonempty α] :

gap ν a = ∫ (x : ℝ), id x ∂ν (bestArm ν) - ∫ (x : ℝ), id x ∂ν a

source

@[simp]

theorem Bandits.gap_bestArm {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} [Fintype α] [Nonempty α] :

gap ν (bestArm ν) = 0

Documentation

LeanBandits.Regret

Regret #

Definitions of regret, gaps, pull counts #