Laws of stepsUntil and rewardByCount #
theorem
Bandits.measurable_pullCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(t : ℕ)
 :
Measurable fun (h : ℕ → α × ℝ) => pullCount a t h
theorem
Bandits.measurable_sumRewards
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(t : ℕ)
 :
Measurable (sumRewards a t)
theorem
Bandits.measurable_stepsUntil
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m : ℕ)
 :
Measurable fun (h : ℕ → α × ℝ) => stepsUntil a m h
theorem
Bandits.measurable_stepsUntil'
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m : ℕ)
 :
Measurable fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1
theorem
Bandits.measurable_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m : ℕ)
 :
Measurable fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2
theorem
Bandits.hasLaw_Z
{α : Type u_1}
{mα : MeasurableSpace α}
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(m : ℕ)
 :
ProbabilityTheory.HasLaw (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => ω.2 m a) (ν a) (Bandit.measure alg ν)
Law of Y conditioned on the event s.
Equations
- One or more equations did not get rendered due to their size.
Instances For
Law of Y conditioned on the event that X is in s.
Equations
- One or more equations did not get rendered due to their size.
Instances For
Law of Y conditioned on the event that X equals x.
Equations
- One or more equations did not get rendered due to their size.
Instances For
theorem
Bandits.condDistrib_reward''
{α : Type u_1}
{mα : MeasurableSpace α}
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Nonempty α]
(n : ℕ)
 :
theorem
Bandits.reward_cond_arm
{α : Type u_1}
{mα : MeasurableSpace α}
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Nonempty α]
[Countable α]
(a : α)
(n : ℕ)
(hμa : (MeasureTheory.Measure.map (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => arm n ω.1) (Bandit.measure alg ν)) {a} ≠ 0)
 :
theorem
Bandits.measurable_comap_indicator_stepsUntil_eq
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m n : ℕ)
 :
theorem
Bandits.measurable_indicator_stepsUntil_eq
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m n : ℕ)
 :
theorem
Bandits.measurableSet_stepsUntil_eq
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m n : ℕ)
 :
MeasurableSet {ω : ℕ → α × ℝ | stepsUntil a m ω = ↑n}
theorem
Bandits.condIndepFun_reward_stepsUntil_arm'
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Countable α]
[Nonempty α]
(a : α)
(m n : ℕ)
(hm : m ≠ 0)
 :
ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (arm n) mα) ⋯ (reward n)
  ({ω : ℕ → α × ℝ | stepsUntil a m ω = ↑n}.indicator fun (x : ℕ → α × ℝ) => 1) (Bandit.trajMeasure alg ν)
theorem
Bandits.condIndepFun_reward_stepsUntil_arm
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Countable α]
[Nonempty α]
(a : α)
(m n : ℕ)
(hm : m ≠ 0)
 :
theorem
Bandits.reward_cond_stepsUntil
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Countable α]
[Nonempty α]
(a : α)
(m n : ℕ)
(hm : m ≠ 0)
(hμn : (Bandit.measure alg ν) ((fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1) ⁻¹' {↑n}) ≠ 0)
 :
theorem
Bandits.condDistrib_rewardByCount_stepsUntil
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(m : ℕ)
(hm : m ≠ 0)
 :
⇑𝓛[fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2 | fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) =>
      stepsUntil a m ω.1;       Bandit.measure alg
        ν] =ᵐ[MeasureTheory.Measure.map (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1)
    (Bandit.measure alg ν)]   ⇑(ProbabilityTheory.Kernel.const ℕ∞ (ν a))
theorem
Bandits.hasLaw_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(m : ℕ)
(hm : m ≠ 0)
 :
ProbabilityTheory.HasLaw (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2) (ν a) (Bandit.measure alg ν)
The reward received at the m-th pull of arm a has law ν a.
theorem
Bandits.identDistrib_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(n m : ℕ)
(hn : n ≠ 0)
(hm : m ≠ 0)
 :
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2)
  (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2) (Bandit.measure alg ν) (Bandit.measure alg ν)
theorem
Bandits.identDistrib_rewardByCount_id
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(n : ℕ)
(hn : n ≠ 0)
 :
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2) id
  (Bandit.measure alg ν) (ν a)
theorem
Bandits.identDistrib_rewardByCount_eval
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(n m : ℕ)
(hn : n ≠ 0)
 :
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2)
  (fun (ω : ℕ → α → ℝ) => ω m a) (Bandit.measure alg ν) (Bandit.streamMeasure ν)
theorem
Bandits.indepFun_rewardByCount_Iic
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(alg : Learning.Algorithm α ℝ)
(ν : ProbabilityTheory.Kernel α ℝ)
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(n : ℕ)
 :
ProbabilityTheory.IndepFun (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a (n + 1) ω.1 ω.2)
  (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (i : ↥(Finset.Iic n)) => rewardByCount a (↑i) ω.1 ω.2) (Bandit.measure alg ν)
theorem
Bandits.iIndepFun_rewardByCount'
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(alg : Learning.Algorithm α ℝ)
(ν : ProbabilityTheory.Kernel α ℝ)
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
 :
ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2)
  (Bandit.measure alg ν)
theorem
Bandits.iIndepFun_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(alg : Learning.Algorithm α ℝ)
(ν : ProbabilityTheory.Kernel α ℝ)
[ProbabilityTheory.IsMarkovKernel ν]
 :
ProbabilityTheory.iIndepFun (fun (p : α × ℕ) (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount p.1 p.2 ω.1 ω.2)
  (Bandit.measure alg ν)
theorem
Bandits.identDistrib_rewardByCount_stream'
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
 :
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (n : ℕ) => rewardByCount a (n + 1) ω.1 ω.2)
  (fun (ω : ℕ → α → ℝ) (n : ℕ) => ω n a) (Bandit.measure alg ν) (Bandit.streamMeasure ν)
theorem
Bandits.identDistrib_rewardByCount_stream
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
 :
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (n : ℕ) => rewardByCount a (n + 1) ω.1 ω.2)
  (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (n : ℕ) => ω.2 n a) (Bandit.measure alg ν) (Bandit.measure alg ν)
theorem
Bandits.indepFun_rewardByCount_of_ne
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{a b : α}
(hab : a ≠ b)
 :
ProbabilityTheory.IndepFun (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (s : ℕ) => rewardByCount a s ω.1 ω.2)
  (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (s : ℕ) => rewardByCount b s ω.1 ω.2) (Bandit.measure alg ν)