Documentation

LeanBandits.SequentialLearning.FiniteActions

Bookkeeping definitions for finite action space sequential learning problems #

If the number of actions is finite, it makes sense to define the number of times each action was chosen, the time at which an action was chosen for the nth time, the value of the reward at that time, the sum of rewards obtained for each action, the empirical mean reward for each action, etc.

For each definition that take as arguments a time t : ℕ, a history h : ℕ → α × R, and possibly other parameters, we put the time and history at the end in this order, so that the definition can be seen as a stochastic process indexed by time t on the measurable space ℕ → α × R.

noncomputable def Learning.pullCount {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : Ωα) (a : α) (t : ) (ω : Ω) :

Number of times action a was chosen up to time t (excluding t).

Equations
Instances For
    noncomputable def Learning.pullCount' {α : Type u_1} {R : Type u_2} [DecidableEq α] (n : ) (h : (Finset.Iic n)α × R) (a : α) :

    Number of pulls of arm a up to (and including) time n. This is the number of entries in h in which the arm is a.

    Equations
    Instances For
      @[simp]
      theorem Learning.pullCount_zero {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) :
      pullCount A a 0 = 0
      theorem Learning.pullCount_zero_apply {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) (ω : Ω) :
      pullCount A a 0 ω = 0
      theorem Learning.pullCount_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {ω : Ω} :
      pullCount A a 1 ω = if A 0 ω = a then 1 else 0
      theorem Learning.monotone_pullCount {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) (ω : Ω) :
      Monotone fun (x : ) => pullCount A a x ω
      theorem Learning.pullCount_mono {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) {n m : } (hnm : n m) (ω : Ω) :
      pullCount A a n ω pullCount A a m ω
      theorem Learning.pullCount_action_eq_pullCount_add_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (t : ) (ω : Ω) :
      pullCount A (A t ω) (t + 1) ω = pullCount A (A t ω) t ω + 1
      theorem Learning.pullCount_eq_pullCount_of_action_ne {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {t : } {ω : Ω} (ha : A t ω a) :
      pullCount A a (t + 1) ω = pullCount A a t ω
      theorem Learning.pullCount_add_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {t : } {ω : Ω} :
      pullCount A a (t + 1) ω = pullCount A a t ω + if A t ω = a then 1 else 0
      theorem Learning.pullCount_eq_sum {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) (t : ) (ω : Ω) :
      pullCount A a t ω = sFinset.range t, if A s ω = a then 1 else 0
      theorem Learning.pullCount'_eq_sum {α : Type u_1} {R : Type u_2} [DecidableEq α] (n : ) (h : (Finset.Iic n)α × R) (a : α) :
      pullCount' n h a = s : (Finset.Iic n), if (h s).1 = a then 1 else 0
      theorem Learning.pullCount_add_one_eq_pullCount' {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} {a : α} {n : } {ω : Ω} :
      pullCount A a (n + 1) ω = pullCount' n (fun (i : (Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a
      theorem Learning.pullCount_eq_pullCount' {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} {a : α} {n : } {ω : Ω} (hn : n 0) :
      pullCount A a n ω = pullCount' (n - 1) (fun (i : (Finset.Iic (n - 1))) => (A (↑i) ω, R' (↑i) ω)) a
      theorem Learning.pullCount'_mono {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} {a : α} {ω : Ω} {n m : } (hnm : n m) :
      pullCount' n (fun (i : (Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a pullCount' m (fun (i : (Finset.Iic m)) => (A (↑i) ω, R' (↑i) ω)) a
      theorem Learning.pullCount_le {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) (t : ) (ω : Ω) :
      pullCount A a t ω t
      theorem Learning.pullCount_congr {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {n : } {ω ω' : Ω} (h_eq : in, A i ω = A i ω') :
      pullCount A a (n + 1) ω = pullCount A a (n + 1) ω'
      theorem Learning.pullCount_lt_of_forall_ne {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {n t : } {ω : Ω} (h_lt : ∀ (s : ), pullCount A a (s + 1) ω t) (ht : t 0) :
      pullCount A a n ω < t
      theorem Learning.exists_pullCount_eq_of_le {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {n t : } {ω : Ω} (hnm : t pullCount A a (n + 1) ω) (ht : t 0) :
      ∃ (s : ), pullCount A a (s + 1) ω = t
      theorem Learning.pullCount_le_add {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) (n C : ) (ω : Ω) :
      pullCount A a n ω C + 1 + sFinset.range n, {s : | A s ω = a C < pullCount A a s ω}.indicator 1 s
      theorem Learning.measurable_pullCount {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (a : α) (t : ) :
      Measurable fun (ω : Ω) => pullCount A a t ω
      theorem Learning.measurable_uncurry_pullCount {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} [MeasurableEq α] (hA : ∀ (n : ), Measurable (A n)) (t : ) :
      Measurable fun (p : Ω × α) => pullCount A p.2 t p.1
      theorem Learning.measurable_pullCount' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [DecidableEq α] [MeasurableSingletonClass α] (n : ) (a : α) :
      Measurable fun (h : (Finset.Iic n)α × R) => pullCount' n h a
      theorem Learning.measurable_uncurry_pullCount' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [DecidableEq α] [MeasurableEq α] (n : ) :
      Measurable fun (p : ((Finset.Iic n)α × R) × α) => pullCount' n p.1 p.2
      theorem Learning.adapted_pullCount_add_one' {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (n : ) :
      Measurable (pullCount A a (n + 1))
      theorem Learning.adapted_pullCount_add_one {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) :
      theorem Learning.isPredictable_pullCount {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) :
      theorem Learning.integrable_pullCount {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} { : MeasurableSpace Ω} [DecidableEq α] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ωα} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (a : α) (n : ) :
      MeasureTheory.Integrable (fun (ω : Ω) => (pullCount A a n ω)) P
      noncomputable def Learning.stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : Ωα) (a : α) (m : ) (ω : Ω) :

      Number of steps until action a was pulled exactly m times.

      Equations
      Instances For
        theorem Learning.stepsUntil_eq_top_iff {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} :
        stepsUntil A a m ω = ∀ (s : ), pullCount A a (s + 1) ω m
        theorem Learning.stepsUntil_ne_top {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} (h_exists : ∃ (s : ), pullCount A a (s + 1) ω = m) :
        stepsUntil A a m ω
        theorem Learning.exists_pullCount_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} (h' : stepsUntil A a m ω ) :
        ∃ (s : ), pullCount A a (s + 1) ω = m
        theorem Learning.stepsUntil_zero_of_ne {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {ω : Ω} (hka : A 0 ω a) :
        stepsUntil A a 0 ω = 0
        theorem Learning.stepsUntil_zero_of_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {ω : Ω} (hka : A 0 ω = a) :
        stepsUntil A a 0 ω =
        theorem Learning.stepsUntil_eq_dite {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) (m : ) (ω : Ω) [Decidable (∃ (s : ), pullCount A a (s + 1) ω = m)] :
        stepsUntil A a m ω = if h : ∃ (s : ), pullCount A a (s + 1) ω = m then (Nat.find h) else
        theorem Learning.stepsUntil_eq_leastGE {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {m : } (a : α) (hm : m 0) :
        stepsUntil A a m = MeasureTheory.leastGE (fun (n : ) (ω : Ω) => (pullCount A a (n + 1) ω)) m
        theorem Learning.stepsUntil_mono {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) (ω : Ω) {n m : } (hn : n 0) (hnm : n m) :
        stepsUntil A a n ω stepsUntil A a m ω
        theorem Learning.stepsUntil_pullCount_le {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (ω : Ω) (a : α) (t : ) :
        stepsUntil A a (pullCount A a (t + 1) ω) ω t
        theorem Learning.stepsUntil_pullCount_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (ω : Ω) (t : ) :
        stepsUntil A (A t ω) (pullCount A (A t ω) (t + 1) ω) ω = t
        theorem Learning.stepsUntil_one_of_eq {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {ω : Ω} (hka : A 0 ω = a) :
        stepsUntil A a 1 ω = 0

        If we pull action a at time 0, the first time at which it is pulled once is 0.

        theorem Learning.stepsUntil_eq_zero_iff {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} :
        stepsUntil A a m ω = 0 m = 0 A 0 ω a m = 1 A 0 ω = a
        theorem Learning.action_stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} (hm : m 0) (h_exists : ∃ (s : ), pullCount A a (s + 1) ω = m) :
        A (stepsUntil A a m ω).toNat ω = a
        theorem Learning.action_eq_of_stepsUntil_eq_coe {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m n : } {ω : Ω} (hm : m 0) (h : stepsUntil A a m ω = n) :
        A n ω = a
        theorem Learning.pullCount_stepsUntil_add_one {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} (h_exists : ∃ (s : ), pullCount A a (s + 1) ω = m) :
        pullCount A a (stepsUntil A a m ω + 1).toNat ω = m
        theorem Learning.pullCount_stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} (hm : m 0) (h_exists : ∃ (s : ), pullCount A a (s + 1) ω = m) :
        pullCount A a (stepsUntil A a m ω).toNat ω = m - 1
        theorem Learning.pullCount_lt_of_le_stepsUntil {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} (a : α) {n m : } (ω : Ω) (h_exists : ∃ (s : ), pullCount A a (s + 1) ω = m) (hn : n < stepsUntil A a m ω) :
        pullCount A a (n + 1) ω < m
        theorem Learning.pullCount_eq_of_stepsUntil_eq_coe {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m n : } {ω : Ω} (hm : m 0) (h : stepsUntil A a m ω = n) :
        pullCount A a n ω = m - 1
        theorem Learning.pullCount_add_one_eq_of_stepsUntil_eq_coe {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m n : } {ω : Ω} (h : stepsUntil A a m ω = n) :
        pullCount A a (n + 1) ω = m
        theorem Learning.stepsUntil_eq_iff {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} (n : ) :
        stepsUntil A a m ω = n pullCount A a (n + 1) ω = m k < n, pullCount A a (k + 1) ω < m
        theorem Learning.stepsUntil_eq_iff' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m : } {ω : Ω} (hm : m 0) (n : ) :
        stepsUntil A a m ω = n A n ω = a pullCount A a n ω = m - 1
        theorem Learning.stepsUntil_eq_congr {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {m n : } {ω ω' : Ω} (h_eq : in, A i ω = A i ω') :
        stepsUntil A a m ω = n stepsUntil A a m ω' = n
        theorem Learning.isStoppingTime_stepsUntil {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} {m : } [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (hm : m 0) :
        theorem Learning.measurable_stepsUntil {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (a : α) (m : ) :
        theorem Learning.measurable_stepsUntil' {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (a : α) (m : ) :
        Measurable fun (ω : Ω × (αR)) => stepsUntil A a m ω.1
        theorem Learning.measurable_comap_indicator_stepsUntil_eq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (m n : ) :
        Measurable ({ω : Ω | stepsUntil A a m ω = n}.indicator fun (x : Ω) => 1)
        theorem Learning.measurable_indicator_stepsUntil_eq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (m n : ) :
        Measurable ({ω : Ω | stepsUntil A a m ω = n}.indicator fun (x : Ω) => 1)
        theorem Learning.measurableSet_stepsUntil_eq_zero {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} [DecidableEq α] {A : Ωα} [MeasurableSingletonClass α] (a : α) (m : ) :
        MeasurableSet {ω : Ω | stepsUntil A a m ω = 0}
        theorem Learning.measurable_comap_indicator_stepsUntil_eq_zero {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} [DecidableEq α] {A : Ωα} [MeasurableSingletonClass α] (a : α) (m : ) :
        Measurable ({ω : Ω | stepsUntil A a m ω = 0}.indicator fun (x : Ω) => 1)
        theorem Learning.measurableSet_stepsUntil_eq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (m n : ) :
        MeasurableSet {ω : Ω | stepsUntil A a m ω = n}
        theorem Learning.isStoppingTime_stepsUntil_filtrationAction {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (m : ) :

        stepsUntil a m is a stopping time with respect to the filtration filtrationAction.

        noncomputable def Learning.rewardByCount {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] (A : Ωα) (R' : ΩR) (a : α) (m : ) (ω : Ω × (αR)) :
        R

        Reward obtained when pulling action a for the m-th time. If it is never pulled m times, the reward is given by the second component of ω, which in applications will be indepedent with same law.

        Equations
        Instances For
          theorem Learning.rewardByCount_eq_ite {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} (a : α) (m : ) (ω : Ω × (αR)) :
          rewardByCount A R' a m ω = if stepsUntil A a m ω.1 = then ω.2 m a else R' (stepsUntil A a m ω.1).toNat ω.1
          theorem Learning.rewardByCount_eq_add {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} [AddMonoid R] (a : α) (m : ) :
          rewardByCount A R' a m = ({ω : Ω × (αR) | stepsUntil A a m ω.1 }.indicator fun (ω : Ω × (αR)) => R' (stepsUntil A a m ω.1).toNat ω.1) + {ω : Ω × (αR) | stepsUntil A a m ω.1 = }.indicator fun (ω : Ω × (αR)) => ω.2 m a
          theorem Learning.rewardByCount_of_stepsUntil_eq_top {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} {a : α} {m : } {ω : Ω × (αR)} (h : stepsUntil A a m ω.1 = ) :
          rewardByCount A R' a m ω = ω.2 m a
          theorem Learning.rewardByCount_of_stepsUntil_ne_top {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} {a : α} {m : } {ω : Ω × (αR)} (h : stepsUntil A a m ω.1 ) :
          rewardByCount A R' a m ω = R' (stepsUntil A a m ω.1).toNat ω.1
          theorem Learning.rewardByCount_eq_stoppedValue {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} {a : α} {m : } {ω : Ω × (αR)} (h : stepsUntil A a m ω.1 ) :
          theorem Learning.rewardByCount_of_stepsUntil_eq_coe {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} {a : α} {m n : } {ω : Ω × (αR)} (h : stepsUntil A a m ω.1 = n) :
          rewardByCount A R' a m ω = R' n ω.1
          @[simp]
          theorem Learning.rewardByCount_zero {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} (a : α) (ω : Ω × (αR)) :
          rewardByCount A R' a 0 ω = if A 0 ω.1 = a then ω.2 0 a else R' 0 ω.1

          The value at 0 does not matter (it would be the "zeroth" reward). It should be considered a junk value.

          theorem Learning.rewardByCount_pullCount_add_one_eq_reward {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : ΩR} (t : ) (ω : Ω × (αR)) :
          rewardByCount A R' (A t ω.1) (pullCount A (A t ω.1) t ω.1 + 1) ω = R' t ω.1
          theorem Learning.measurable_rewardByCount {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} {R' : ΩR} [MeasurableSingletonClass α] (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (m : ) :
          Measurable fun (ω : Ω × (αR)) => rewardByCount A R' a m ω
          theorem Learning.sum_pullCount_mul {α : Type u_1} {R : Type u_2} {Ω : Type u_3} [DecidableEq α] {A : Ωα} [Fintype α] [Semiring R] (ω : Ω) (f : αR) (t : ) :
          a : α, (pullCount A a t ω) * f a = sFinset.range t, f (A s ω)
          theorem Learning.sum_pullCount {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {t : } [Fintype α] {ω : Ω} :
          a : α, pullCount A a t ω = t
          def Learning.sumRewards {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : Ωα) (R' : Ω) (a : α) (t : ) (ω : Ω) :

          Sum of rewards obtained when pulling action a up to time t (exclusive).

          Equations
          Instances For
            noncomputable def Learning.sumRewards' {α : Type u_1} [DecidableEq α] (n : ) (h : (Finset.Iic n)α × ) (a : α) :

            Sum of rewards of arm a up to (and including) time n.

            Equations
            Instances For
              noncomputable def Learning.empMean {α : Type u_1} {Ω : Type u_3} [DecidableEq α] (A : Ωα) (R' : Ω) (a : α) (t : ) (ω : Ω) :

              Empirical mean reward obtained when pulling action a up to time t (exclusive).

              Equations
              Instances For
                noncomputable def Learning.empMean' {α : Type u_1} [DecidableEq α] (n : ) (h : (Finset.Iic n)α × ) (a : α) :

                Empirical mean of arm a at time n.

                Equations
                Instances For
                  theorem Learning.sumRewards_eq_pullCount_mul_empMean {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {t : } {R' : Ω} {ω : Ω} (h_pull : pullCount A a t ω 0) :
                  sumRewards A R' a t ω = (pullCount A a t ω) * empMean A R' a t ω
                  theorem Learning.sum_rewardByCount_eq_sumRewards {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {R' : Ω} (a : α) (t : ) (ω : Ω × (α)) :
                  mFinset.Icc 1 (pullCount A a t ω.1), rewardByCount A R' a m ω = sumRewards A R' a t ω.1
                  theorem Learning.sumRewards_add_one_eq_sumRewards' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {R' : Ω} {n : } {ω : Ω} :
                  sumRewards A R' a (n + 1) ω = sumRewards' n (fun (i : (Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a
                  theorem Learning.sumRewards_eq_sumRewards' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {R' : Ω} {n : } {ω : Ω} (hn : n 0) :
                  sumRewards A R' a n ω = sumRewards' (n - 1) (fun (i : (Finset.Iic (n - 1))) => (A (↑i) ω, R' (↑i) ω)) a
                  theorem Learning.empMean_add_one_eq_empMean' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {R' : Ω} {n : } {ω : Ω} :
                  empMean A R' a (n + 1) ω = empMean' n (fun (i : (Finset.Iic n)) => (A (↑i) ω, R' (↑i) ω)) a
                  theorem Learning.empMean_eq_empMean' {α : Type u_1} {Ω : Type u_3} [DecidableEq α] {A : Ωα} {a : α} {R' : Ω} {n : } {ω : Ω} (hn : n 0) :
                  empMean A R' a n ω = empMean' (n - 1) (fun (i : (Finset.Iic (n - 1))) => (A (↑i) ω, R' (↑i) ω)) a
                  theorem Learning.measurable_sumRewards {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} [MeasurableSingletonClass α] {R' : Ω} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (t : ) :
                  theorem Learning.measurable_empMean {α : Type u_1} {Ω : Type u_3} { : MeasurableSpace α} { : MeasurableSpace Ω} [DecidableEq α] {A : Ωα} [MeasurableSingletonClass α] {R' : Ω} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (a : α) (n : ) :
                  Measurable (empMean A R' a n)
                  theorem Learning.measurable_sumRewards' {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (n : ) (a : α) :
                  Measurable fun (h : (Finset.Iic n)α × ) => sumRewards' n h a
                  theorem Learning.measurable_empMean' {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (n : ) (a : α) :
                  Measurable fun (h : (Finset.Iic n)α × ) => empMean' n h a