Documentation

LeanBandits.SequentialLearning.FiniteActions

Bookkeeping definitions for finite action space sequential learning problems #

If the number of actions is finite, it makes sense to define the number of times each action was chosen, the time at which an action was chosen for the nth time, the value of the reward at that time, the sum of rewards obtained for each action, the empirical mean reward for each action, etc.

For each definition that take as arguments a time t : ℕ, a history h : ℕ → α × R, and possibly other parameters, we put the time and history at the end in this order, so that the definition can be seen as a stochastic process indexed by time t on the measurable space ℕ → α × R.

noncomputable def Learning.pullCount {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : ℕ → Ω → α) (a : α) (t : ℕ) (ω : Ω) :

Number of times action a was chosen up to time t (excluding t).

Equations

Learning.pullCount A a t ω = {s ∈ Finset.range t | A s ω = a}.card

Instances For

noncomputable def Learning.pullCount' {α : Type u_1} {R : Type u_2} [DecidableEq α] (n : ℕ) (h : ↥(Finset.Iic n) → α × R) (a : α) :

Number of pulls of arm a up to (and including) time n. This is the number of entries in h in which the arm is a.

Equations

Learning.pullCount' n h a = {s : ↥(Finset.Iic n) | (h s).1 = a}.card

Instances For

@[simp]

theorem Learning.pullCount_zero {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) :

pullCount A a 0 = 0

theorem Learning.pullCount_zero_apply {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) (ω : Ω) :

pullCount A a 0 ω = 0

theorem Learning.pullCount_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {ω : Ω} :

pullCount A a 1 ω = if A 0 ω = a then 1 else 0

theorem Learning.monotone_pullCount {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) (ω : Ω) :

Monotone fun (x : ℕ) => pullCount A a x ω

theorem Learning.pullCount_mono {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) {n m : ℕ} (hnm : n ≤ m) (ω : Ω) :

pullCount A a n ω ≤ pullCount A a m ω

theorem Learning.pullCount_action_eq_pullCount_add_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (t : ℕ) (ω : Ω) :

pullCount A (A t ω) (t + 1) ω = pullCount A (A t ω) t ω + 1

theorem Learning.pullCount_eq_pullCount_of_action_ne {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {t : ℕ} {ω : Ω} (ha : A t ω ≠ a) :

pullCount A a (t + 1) ω = pullCount A a t ω

theorem Learning.pullCount_add_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {t : ℕ} {ω : Ω} :

pullCount A a (t + 1) ω = pullCount A a t ω + if A t ω = a then 1 else 0

theorem Learning.pullCount_eq_sum {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) (t : ℕ) (ω : Ω) :

pullCount A a t ω = ∑ s ∈ Finset.range t, if A s ω = a then 1 else 0

theorem Learning.pullCount'_eq_sum {α : Type u_1} {R : Type u_2} [DecidableEq α] (n : ℕ) (h : ↥(Finset.Iic n) → α × R) (a : α) :

pullCount' n h a = ∑ s : ↥(Finset.Iic n), if (h s).1 = a then 1 else 0

theorem Learning.pullCount_add_one_eq_pullCount' {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {a : α} {n : ℕ} {ω : Ω} :

pullCount A a (n + 1) ω = pullCount' n (fun (i : ↥(Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a

theorem Learning.pullCount_eq_pullCount' {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {a : α} {n : ℕ} {ω : Ω} (hn : n ≠ 0) :

pullCount A a n ω = pullCount' (n - 1) (fun (i : ↥(Finset.Iic (n - 1))) => (A (↑i) ω, R' (↑i) ω)) a

theorem Learning.pullCount'_mono {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {a : α} {ω : Ω} {n m : ℕ} (hnm : n ≤ m) :

pullCount' n (fun (i : ↥(Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a ≤ pullCount' m (fun (i : ↥(Finset.Iic m)) => (A (↑i) ω, R' (↑i) ω)) a

theorem Learning.pullCount_le {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) (t : ℕ) (ω : Ω) :

pullCount A a t ω ≤ t

theorem Learning.pullCount_congr {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {n : ℕ} {ω ω' : Ω} (h_eq : ∀ i ≤ n, A i ω = A i ω') :

pullCount A a (n + 1) ω = pullCount A a (n + 1) ω'

theorem Learning.pullCount_lt_of_forall_ne {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {n t : ℕ} {ω : Ω} (h_lt : ∀ (s : ℕ), pullCount A a (s + 1) ω ≠ t) (ht : t ≠ 0) :

pullCount A a n ω < t

theorem Learning.exists_pullCount_eq_of_le {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {n t : ℕ} {ω : Ω} (hnm : t ≤ pullCount A a (n + 1) ω) (ht : t ≠ 0) :

∃ (s : ℕ), pullCount A a (s + 1) ω = t

theorem Learning.pullCount_le_add {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) (n C : ℕ) (ω : Ω) :

pullCount A a n ω ≤ C + 1 + ∑ s ∈ Finset.range n, {s : ℕ | A s ω = a ∧ C < pullCount A a s ω}.indicator 1 s

theorem Learning.measurable_pullCount {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (a : α) (t : ℕ) :

Measurable fun (ω : Ω) => pullCount A a t ω

theorem Learning.measurable_uncurry_pullCount {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableEq α] (hA : ∀ (n : ℕ), Measurable (A n)) (t : ℕ) :

Measurable fun (p : Ω × α) => pullCount A p.2 t p.1

theorem Learning.measurable_pullCount' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [DecidableEq α] [MeasurableSingletonClass α] (n : ℕ) (a : α) :

Measurable fun (h : ↥(Finset.Iic n) → α × R) => pullCount' n h a

theorem Learning.measurable_uncurry_pullCount' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [DecidableEq α] [MeasurableEq α] (n : ℕ) :

Measurable fun (p : (↥(Finset.Iic n) → α × R) × α) => pullCount' n p.1 p.2

theorem Learning.adapted_pullCount_add_one {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) :

MeasureTheory.Adapted (IsAlgEnvSeq.filtration hA hR') fun (n : ℕ) => pullCount A a (n + 1)

theorem Learning.isPredictable_pullCount {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) :

MeasureTheory.IsPredictable (IsAlgEnvSeq.filtration hA hR') (pullCount A a)

theorem Learning.integrable_pullCount {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → α} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (a : α) (n : ℕ) :

MeasureTheory.Integrable (fun (ω : Ω) => ↑(pullCount A a n ω)) P

noncomputable def Learning.stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : ℕ → Ω → α) (a : α) (m : ℕ) (ω : Ω) :

Number of steps until action a was pulled exactly m times.

Equations

Learning.stepsUntil A a m ω = sInf (Nat.cast '' {s : ℕ | Learning.pullCount A a (s + 1) ω = m})

Instances For

theorem Learning.stepsUntil_eq_top_iff {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} :

stepsUntil A a m ω = ⊤ ↔ ∀ (s : ℕ), pullCount A a (s + 1) ω ≠ m

theorem Learning.stepsUntil_ne_top {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} (h_exists : ∃ (s : ℕ), pullCount A a (s + 1) ω = m) :

stepsUntil A a m ω ≠ ⊤

theorem Learning.exists_pullCount_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} (h' : stepsUntil A a m ω ≠ ⊤) :

∃ (s : ℕ), pullCount A a (s + 1) ω = m

theorem Learning.stepsUntil_zero_of_ne {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {ω : Ω} (hka : A 0 ω ≠ a) :

stepsUntil A a 0 ω = 0

theorem Learning.stepsUntil_zero_of_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {ω : Ω} (hka : A 0 ω = a) :

stepsUntil A a 0 ω = ⊤

theorem Learning.stepsUntil_eq_dite {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) (m : ℕ) (ω : Ω) [Decidable (∃ (s : ℕ), pullCount A a (s + 1) ω = m)] :

stepsUntil A a m ω = if h : ∃ (s : ℕ), pullCount A a (s + 1) ω = m then ↑(Nat.find h) else ⊤

theorem Learning.stepsUntil_eq_leastGE {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {m : ℕ} (a : α) (hm : m ≠ 0) :

stepsUntil A a m = MeasureTheory.leastGE (fun (n : ℕ) (ω : Ω) => ↑(pullCount A a (n + 1) ω)) ↑m

theorem Learning.stepsUntil_mono {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) (ω : Ω) {n m : ℕ} (hn : n ≠ 0) (hnm : n ≤ m) :

stepsUntil A a n ω ≤ stepsUntil A a m ω

theorem Learning.stepsUntil_pullCount_le {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (ω : Ω) (a : α) (t : ℕ) :

stepsUntil A a (pullCount A a (t + 1) ω) ω ≤ ↑t

theorem Learning.stepsUntil_pullCount_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (ω : Ω) (t : ℕ) :

stepsUntil A (A t ω) (pullCount A (A t ω) (t + 1) ω) ω = ↑t

theorem Learning.stepsUntil_one_of_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {ω : Ω} (hka : A 0 ω = a) :

stepsUntil A a 1 ω = 0

If we pull action a at time 0, the first time at which it is pulled once is 0.

theorem Learning.stepsUntil_eq_zero_iff {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} :

stepsUntil A a m ω = 0 ↔ m = 0 ∧ A 0 ω ≠ a ∨ m = 1 ∧ A 0 ω = a

theorem Learning.action_stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} (hm : m ≠ 0) (h_exists : ∃ (s : ℕ), pullCount A a (s + 1) ω = m) :

A (stepsUntil A a m ω).toNat ω = a

theorem Learning.action_eq_of_stepsUntil_eq_coe {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m n : ℕ} {ω : Ω} (hm : m ≠ 0) (h : stepsUntil A a m ω = ↑n) :

A n ω = a

theorem Learning.pullCount_stepsUntil_add_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} (h_exists : ∃ (s : ℕ), pullCount A a (s + 1) ω = m) :

pullCount A a (stepsUntil A a m ω + 1).toNat ω = m

theorem Learning.pullCount_stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} (hm : m ≠ 0) (h_exists : ∃ (s : ℕ), pullCount A a (s + 1) ω = m) :

pullCount A a (stepsUntil A a m ω).toNat ω = m - 1

theorem Learning.pullCount_lt_of_le_stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} (a : α) {n m : ℕ} (ω : Ω) (h_exists : ∃ (s : ℕ), pullCount A a (s + 1) ω = m) (hn : ↑n < stepsUntil A a m ω) :

pullCount A a (n + 1) ω < m

theorem Learning.pullCount_eq_of_stepsUntil_eq_coe {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m n : ℕ} {ω : Ω} (hm : m ≠ 0) (h : stepsUntil A a m ω = ↑n) :

pullCount A a n ω = m - 1

theorem Learning.pullCount_add_one_eq_of_stepsUntil_eq_coe {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m n : ℕ} {ω : Ω} (h : stepsUntil A a m ω = ↑n) :

pullCount A a (n + 1) ω = m

theorem Learning.stepsUntil_eq_iff {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} (n : ℕ) :

stepsUntil A a m ω = ↑n ↔ pullCount A a (n + 1) ω = m ∧ ∀ k < n, pullCount A a (k + 1) ω < m

theorem Learning.stepsUntil_eq_iff' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m : ℕ} {ω : Ω} (hm : m ≠ 0) (n : ℕ) :

stepsUntil A a m ω = ↑n ↔ A n ω = a ∧ pullCount A a n ω = m - 1

theorem Learning.stepsUntil_eq_congr {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {m n : ℕ} {ω ω' : Ω} (h_eq : ∀ i ≤ n, A i ω = A i ω') :

stepsUntil A a m ω = ↑n ↔ stepsUntil A a m ω' = ↑n

theorem Learning.isStoppingTime_stepsUntil {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {m : ℕ} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (hm : m ≠ 0) :

MeasureTheory.IsStoppingTime (IsAlgEnvSeq.filtration hA hR') (stepsUntil A a m)

theorem Learning.measurable_stepsUntil {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (a : α) (m : ℕ) :

Measurable (stepsUntil A a m)

theorem Learning.measurable_stepsUntil' {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (a : α) (m : ℕ) :

Measurable fun (ω : Ω × (ℕ → α → R)) => stepsUntil A a m ω.1

theorem Learning.measurable_comap_indicator_stepsUntil_eq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (m n : ℕ) :

Measurable ({ω : Ω | stepsUntil A a m ω = ↑n}.indicator fun (x : Ω) => 1)

theorem Learning.measurable_indicator_stepsUntil_eq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (m n : ℕ) :

Measurable ({ω : Ω | stepsUntil A a m ω = ↑n}.indicator fun (x : Ω) => 1)

theorem Learning.measurableSet_stepsUntil_eq_zero {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableSingletonClass α] (a : α) (m : ℕ) :

MeasurableSet {ω : Ω | stepsUntil A a m ω = 0}

theorem Learning.measurable_comap_indicator_stepsUntil_eq_zero {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableSingletonClass α] (a : α) (m : ℕ) :

Measurable ({ω : Ω | stepsUntil A a m ω = 0}.indicator fun (x : Ω) => 1)

theorem Learning.measurableSet_stepsUntil_eq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (m n : ℕ) :

MeasurableSet {ω : Ω | stepsUntil A a m ω = ↑n}

theorem Learning.isStoppingTime_stepsUntil_filtrationAction {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (m : ℕ) :

MeasureTheory.IsStoppingTime (IsAlgEnvSeq.filtrationAction hA hR') (stepsUntil A a m)

stepsUntil a m is a stopping time with respect to the filtration filtrationAction.

noncomputable def Learning.rewardByCount {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] (A : ℕ → Ω → α) (R' : ℕ → Ω → R) (a : α) (m : ℕ) (ω : Ω × (ℕ → α → R)) :

R

Reward obtained when pulling action a for the m-th time. If it is never pulled m times, the reward is given by the second component of ω, which in applications will be indepedent with same law.

Equations

Learning.rewardByCount A R' a m ω = match Learning.stepsUntil A a m ω.1 with | none => ω.2 m a | some n => R' n ω.1

Instances For

theorem Learning.rewardByCount_eq_ite {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} (a : α) (m : ℕ) (ω : Ω × (ℕ → α → R)) :

rewardByCount A R' a m ω = if stepsUntil A a m ω.1 = ⊤ then ω.2 m a else R' (stepsUntil A a m ω.1).toNat ω.1

theorem Learning.rewardByCount_eq_add {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [AddMonoid R] (a : α) (m : ℕ) :

rewardByCount A R' a m = ({ω : Ω × (ℕ → α → R) | stepsUntil A a m ω.1 ≠ ⊤}.indicator fun (ω : Ω × (ℕ → α → R)) => R' (stepsUntil A a m ω.1).toNat ω.1) + {ω : Ω × (ℕ → α → R) | stepsUntil A a m ω.1 = ⊤}.indicator fun (ω : Ω × (ℕ → α → R)) => ω.2 m a

theorem Learning.rewardByCount_of_stepsUntil_eq_top {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {a : α} {m : ℕ} {ω : Ω × (ℕ → α → R)} (h : stepsUntil A a m ω.1 = ⊤) :

rewardByCount A R' a m ω = ω.2 m a

theorem Learning.rewardByCount_of_stepsUntil_ne_top {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {a : α} {m : ℕ} {ω : Ω × (ℕ → α → R)} (h : stepsUntil A a m ω.1 ≠ ⊤) :

rewardByCount A R' a m ω = R' (stepsUntil A a m ω.1).toNat ω.1

theorem Learning.rewardByCount_eq_stoppedValue {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {a : α} {m : ℕ} {ω : Ω × (ℕ → α → R)} (h : stepsUntil A a m ω.1 ≠ ⊤) :

rewardByCount A R' a m ω = MeasureTheory.stoppedValue R' (stepsUntil A a m) ω.1

theorem Learning.rewardByCount_of_stepsUntil_eq_coe {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} {a : α} {m n : ℕ} {ω : Ω × (ℕ → α → R)} (h : stepsUntil A a m ω.1 = ↑n) :

rewardByCount A R' a m ω = R' n ω.1

@[simp]

theorem Learning.rewardByCount_zero {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} (a : α) (ω : Ω × (ℕ → α → R)) :

rewardByCount A R' a 0 ω = if A 0 ω.1 = a then ω.2 0 a else R' 0 ω.1

The value at 0 does not matter (it would be the "zeroth" reward). It should be considered a junk value.

theorem Learning.rewardByCount_pullCount_add_one_eq_reward {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} (t : ℕ) (ω : Ω × (ℕ → α → R)) :

rewardByCount A R' (A t ω.1) (pullCount A (A t ω.1) t ω.1 + 1) ω = R' t ω.1

theorem Learning.measurable_rewardByCount {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → R} [MeasurableSingletonClass α] (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (m : ℕ) :

Measurable fun (ω : Ω × (ℕ → α → R)) => rewardByCount A R' a m ω

theorem Learning.sum_pullCount_mul {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} [Fintype α] [Semiring R] (ω : Ω) (f : α → R) (t : ℕ) :

∑ a : α, ↑(pullCount A a t ω) * f a = ∑ s ∈ Finset.range t, f (A s ω)

theorem Learning.sum_pullCount {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {t : ℕ} [Fintype α] {ω : Ω} :

∑ a : α, pullCount A a t ω = t

def Learning.sumRewards {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : ℕ → Ω → α) (R' : ℕ → Ω → ℝ) (a : α) (t : ℕ) (ω : Ω) :

Sum of rewards obtained when pulling action a up to time t (exclusive).

Equations

Learning.sumRewards A R' a t ω = ∑ s ∈ Finset.range t, if A s ω = a then R' s ω else 0

Instances For

noncomputable def Learning.sumRewards' {α : Type u_1} [DecidableEq α] (n : ℕ) (h : ↥(Finset.Iic n) → α × ℝ) (a : α) :

Sum of rewards of arm a up to (and including) time n.

Equations

Learning.sumRewards' n h a = ∑ s : ↥(Finset.Iic n), if (h s).1 = a then (h s).2 else 0

Instances For

noncomputable def Learning.empMean {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : ℕ → Ω → α) (R' : ℕ → Ω → ℝ) (a : α) (t : ℕ) (ω : Ω) :

Empirical mean reward obtained when pulling action a up to time t (exclusive).

Equations

Learning.empMean A R' a t ω = Learning.sumRewards A R' a t ω / ↑(Learning.pullCount A a t ω)

Instances For

noncomputable def Learning.empMean' {α : Type u_1} [DecidableEq α] (n : ℕ) (h : ↥(Finset.Iic n) → α × ℝ) (a : α) :

Empirical mean of arm a at time n.

Equations

Learning.empMean' n h a = Learning.sumRewards' n h a / ↑(Learning.pullCount' n h a)

Instances For

@[simp]

theorem Learning.sumRewards_zero {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {R' : ℕ → Ω → ℝ} :

sumRewards A R' a 0 = 0

theorem Learning.sumRewards_add_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {t : ℕ} {ω : Ω} {R' : ℕ → Ω → ℝ} :

sumRewards A R' a (t + 1) ω = sumRewards A R' a t ω + if A t ω = a then R' t ω else 0

theorem Learning.sumRewards_eq_of_pullCount_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {ω : Ω} {R' : ℕ → Ω → ℝ} {s t : ℕ} (h_eq : pullCount A a s ω = pullCount A a t ω) :

sumRewards A R' a s ω = sumRewards A R' a t ω

theorem Learning.sumRewards_eq_pullCount_mul_empMean {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {t : ℕ} {R' : ℕ → Ω → ℝ} {ω : Ω} (h_pull : pullCount A a t ω ≠ 0) :

sumRewards A R' a t ω = ↑(pullCount A a t ω) * empMean A R' a t ω

theorem Learning.sum_rewardByCount_eq_sumRewards {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {R' : ℕ → Ω → ℝ} (a : α) (t : ℕ) (ω : Ω × (ℕ → α → ℝ)) :

∑ m ∈ Finset.Icc 1 (pullCount A a t ω.1), rewardByCount A R' a m ω = sumRewards A R' a t ω.1

theorem Learning.sumRewards_add_one_eq_sumRewards' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {R' : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} :

sumRewards A R' a (n + 1) ω = sumRewards' n (fun (i : ↥(Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a

theorem Learning.sumRewards_eq_sumRewards' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {R' : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} (hn : n ≠ 0) :

sumRewards A R' a n ω = sumRewards' (n - 1) (fun (i : ↥(Finset.Iic (n - 1))) => (A (↑i) ω, R' (↑i) ω)) a

theorem Learning.empMean_add_one_eq_empMean' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {R' : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} :

empMean A R' a (n + 1) ω = empMean' n (fun (i : ↥(Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a

theorem Learning.empMean_eq_empMean' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {R' : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} (hn : n ≠ 0) :

empMean A R' a n ω = empMean' (n - 1) (fun (i : ↥(Finset.Iic (n - 1))) => (A (↑i) ω, R' (↑i) ω)) a

theorem Learning.sumRewards_sub_pullCount_mul_eq_sum {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : ℕ → Ω → α} {a : α} {n : ℕ} {ω : Ω} {R' : ℕ → Ω → ℝ} (c : α → ℝ) :

sumRewards A R' a (n + 1) ω - ↑(pullCount A a (n + 1) ω) * c a = ∑ i ∈ Finset.range (n + 1), if A i ω = a then R' i ω - c a else 0

theorem Learning.measurable_sumRewards {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableSingletonClass α] {R' : ℕ → Ω → ℝ} (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (t : ℕ) :

Measurable (sumRewards A R' a t)

theorem Learning.measurable_empMean {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {A : ℕ → Ω → α} [MeasurableSingletonClass α] {R' : ℕ → Ω → ℝ} (hA : ∀ (n : ℕ), Measurable (A n)) (hR' : ∀ (n : ℕ), Measurable (R' n)) (a : α) (n : ℕ) :

Measurable (empMean A R' a n)

theorem Learning.measurable_sumRewards' {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (n : ℕ) (a : α) :

Measurable fun (h : ↥(Finset.Iic n) → α × ℝ) => sumRewards' n h a

theorem Learning.measurable_empMean' {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (n : ℕ) (a : α) :

Measurable fun (h : ↥(Finset.Iic n) → α × ℝ) => empMean' n h a

theorem Learning.IsAlgEnvSeq.isPredictable_sumRewards {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → α} [StandardBorelSpace α] [Nonempty α] {R' : ℕ → Ω → ℝ} {alg : Algorithm α ℝ} {env : Environment α ℝ} (h : IsAlgEnvSeq A R' alg env P) (a : α) :

MeasureTheory.IsPredictable (filtration ⋯ ⋯) (sumRewards A R' a)

theorem Learning.IsAlgEnvSeq.adapted_sumRewards_add_one {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → α} [StandardBorelSpace α] [Nonempty α] {R' : ℕ → Ω → ℝ} {alg : Algorithm α ℝ} {env : Environment α ℝ} (h : IsAlgEnvSeq A R' alg env P) (a : α) :

MeasureTheory.Adapted (filtration ⋯ ⋯) fun (n : ℕ) => sumRewards A R' a (n + 1)

theorem MeasureTheory.StronglyMeasurable.div₀' {α : Type u_4} {β : Type u_5} {mα : MeasurableSpace α} [TopologicalSpace β] [GroupWithZero β] [ContinuousMul β] [ContinuousInv₀ β] [TopologicalSpace.PseudoMetrizableSpace β] [MeasurableSpace β] [BorelSpace β] [MeasurableSingletonClass β] {f g : α → β} (hf : StronglyMeasurable f) (hg : StronglyMeasurable g) :

StronglyMeasurable (f / g)

theorem Learning.IsAlgEnvSeq.isPredictable_empMean {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → α} [StandardBorelSpace α] [Nonempty α] {R' : ℕ → Ω → ℝ} {alg : Algorithm α ℝ} {env : Environment α ℝ} (h : IsAlgEnvSeq A R' alg env P) (a : α) :

MeasureTheory.IsPredictable (filtration ⋯ ⋯) (empMean A R' a)

theorem Learning.IsAlgEnvSeq.adapted_empMean_add_one {α : Type u_1} {Ω : Type u_3} {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} [DecidableEq α] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → α} [StandardBorelSpace α] [Nonempty α] {R' : ℕ → Ω → ℝ} {alg : Algorithm α ℝ} {env : Environment α ℝ} (h : IsAlgEnvSeq A R' alg env P) (a : α) :

MeasureTheory.Adapted (filtration ⋯ ⋯) fun (n : ℕ) => empMean A R' a (n + 1)