Documentation

LeanBandits.RewardByCountMeasure

Laws of stepsUntil and rewardByCount #

theorem Bandits.measurable_pullCount {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (t : ) :
Measurable fun (h : α × ) => pullCount a t h
theorem Bandits.measurable_stepsUntil {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m : ) :
Measurable fun (h : α × ) => stepsUntil a m h
theorem Bandits.measurable_stepsUntil' {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m : ) :
Measurable fun (ω : (α × ) × (α)) => stepsUntil a m ω.1
theorem Bandits.measurable_rewardByCount {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m : ) :
Measurable fun (ω : (α × ) × (α)) => rewardByCount a m ω.1 ω.2
theorem Bandits.hasLaw_Z {α : Type u_1} { : MeasurableSpace α} {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] (a : α) (m : ) :
ProbabilityTheory.HasLaw (fun (ω : (α × ) × (α)) => ω.2 m a) (ν a) (Bandit.measure alg ν)

Law of Y conditioned on the event s.

Equations
  • One or more equations did not get rendered due to their size.
Instances For

    Law of Y conditioned on the event that X is in s.

    Equations
    • One or more equations did not get rendered due to their size.
    Instances For

      Law of Y conditioned on the event that X equals x.

      Equations
      • One or more equations did not get rendered due to their size.
      Instances For
        theorem Bandits.condDistrib_reward'' {α : Type u_1} { : MeasurableSpace α} {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Nonempty α] (n : ) :
        𝓛[fun (ω : (α × ) × (α)) => reward n ω.1 | fun (ω : (α × ) × (α)) => arm n ω.1; Bandit.measure alg ν] =ᵐ[MeasureTheory.Measure.map (fun (ω : (α × ) × (α)) => arm n ω.1) (Bandit.measure alg ν)] ν
        theorem Bandits.reward_cond_arm {α : Type u_1} { : MeasurableSpace α} [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Nonempty α] [Countable α] (a : α) (n : ) (hμa : (MeasureTheory.Measure.map (fun (ω : (α × ) × (α)) => arm n ω.1) (Bandit.measure alg ν)) {a} 0) :
        𝓛[fun (ω : (α × ) × (α)) => reward n ω.1 | fun (ω : (α × ) × (α)) => arm n ω.1 in {a}; Bandit.measure alg ν] = ν a
        theorem Bandits.measurable_comap_indicator_stepsUntil_eq {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m n : ) :
        Measurable ({ω : α × | stepsUntil a m ω = n}.indicator fun (x : α × ) => 1)
        theorem Bandits.measurable_indicator_stepsUntil_eq {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m n : ) :
        Measurable ({ω : α × | stepsUntil a m ω = n}.indicator fun (x : α × ) => 1)
        theorem Bandits.measurableSet_stepsUntil_eq {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m n : ) :
        MeasurableSet {ω : α × | stepsUntil a m ω = n}
        theorem Bandits.condIndepFun_reward_stepsUntil_arm {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Countable α] [Nonempty α] (a : α) (m n : ) (hm : m 0) :
        ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (fun (ω : (α × ) × (α)) => arm n ω.1) ) (fun (ω : (α × ) × (α)) => reward n ω.1) ({ω : (α × ) × (α) | stepsUntil a m ω.1 = n}.indicator fun (x : (α × ) × (α)) => 1) (Bandit.measure alg ν)
        theorem Bandits.reward_cond_stepsUntil {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Countable α] [Nonempty α] (a : α) (m n : ) (hm : m 0) (hμn : (Bandit.measure alg ν) ((fun (ω : (α × ) × (α)) => stepsUntil a m ω.1) ⁻¹' {n}) 0) :
        𝓛[fun (ω : (α × ) × (α)) => reward n ω.1 | fun (ω : (α × ) × (α)) => stepsUntil a m ω.1 in {n}; Bandit.measure alg ν] = ν a
        theorem Bandits.condDistrib_rewardByCount_stepsUntil {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (m : ) (hm : m 0) :
        𝓛[fun (ω : (α × ) × (α)) => rewardByCount a m ω.1 ω.2 | fun (ω : (α × ) × (α)) => stepsUntil a m ω.1; Bandit.measure alg ν] =ᵐ[MeasureTheory.Measure.map (fun (ω : (α × ) × (α)) => stepsUntil a m ω.1) (Bandit.measure alg ν)] (ProbabilityTheory.Kernel.const ℕ∞ (ν a))
        theorem Bandits.hasLaw_rewardByCount {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (m : ) (hm : m 0) :
        ProbabilityTheory.HasLaw (fun (ω : (α × ) × (α)) => rewardByCount a m ω.1 ω.2) (ν a) (Bandit.measure alg ν)

        The reward received at the m-th pull of arm a has law ν a.

        theorem Bandits.identDistrib_rewardByCount {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (n m : ) (hn : n 0) (hm : m 0) :
        ProbabilityTheory.IdentDistrib (fun (ω : (α × ) × (α)) => rewardByCount a n ω.1 ω.2) (fun (ω : (α × ) × (α)) => rewardByCount a m ω.1 ω.2) (Bandit.measure alg ν) (Bandit.measure alg ν)
        theorem Bandits.identDistrib_rewardByCount_id {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (n : ) (hn : n 0) :
        ProbabilityTheory.IdentDistrib (fun (ω : (α × ) × (α)) => rewardByCount a n ω.1 ω.2) id (Bandit.measure alg ν) (ν a)
        theorem Bandits.identDistrib_rewardByCount_eval {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (n m : ) (hn : n 0) :
        ProbabilityTheory.IdentDistrib (fun (ω : (α × ) × (α)) => rewardByCount a n ω.1 ω.2) (fun (ω : α) => ω m a) (Bandit.measure alg ν) (Bandit.streamMeasure ν)
        theorem Bandits.indepFun_rewardByCount_Iic {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (alg : Learning.Algorithm α ) (ν : ProbabilityTheory.Kernel α ) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n : ) :
        ProbabilityTheory.IndepFun (fun (ω : (α × ) × (α)) => rewardByCount a (n + 1) ω.1 ω.2) (fun (ω : (α × ) × (α)) (i : (Finset.Iic n)) => rewardByCount a (↑i) ω.1 ω.2) (Bandit.measure alg ν)
        theorem Bandits.identDistrib_rewardByCount_stream' {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) :
        ProbabilityTheory.IdentDistrib (fun (ω : (α × ) × (α)) (n : ) => rewardByCount a (n + 1) ω.1 ω.2) (fun (ω : α) (n : ) => ω n a) (Bandit.measure alg ν) (Bandit.streamMeasure ν)
        theorem Bandits.identDistrib_rewardByCount_stream {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) :
        ProbabilityTheory.IdentDistrib (fun (ω : (α × ) × (α)) (n : ) => rewardByCount a (n + 1) ω.1 ω.2) (fun (ω : (α × ) × (α)) (n : ) => ω.2 n a) (Bandit.measure alg ν) (Bandit.measure alg ν)
        theorem Bandits.indepFun_rewardByCount_of_ne {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {a b : α} (hab : a b) :
        ProbabilityTheory.IndepFun (fun (ω : (α × ) × (α)) (s : ) => rewardByCount a s ω.1 ω.2) (fun (ω : (α × ) × (α)) (s : ) => rewardByCount b s ω.1 ω.2) (Bandit.measure alg ν)