Laws of stepsUntil
and rewardByCount
#
theorem
Bandits.measurable_pullCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(t : ℕ)
:
Measurable fun (h : ℕ → α × ℝ) => pullCount a t h
theorem
Bandits.measurable_sumRewards
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(t : ℕ)
:
Measurable (sumRewards a t)
theorem
Bandits.measurable_stepsUntil
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m : ℕ)
:
Measurable fun (h : ℕ → α × ℝ) => stepsUntil a m h
theorem
Bandits.measurable_stepsUntil'
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m : ℕ)
:
Measurable fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1
theorem
Bandits.measurable_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(a : α)
(m : ℕ)
:
Measurable fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2
theorem
Bandits.hasLaw_Z
{α : Type u_1}
{mα : MeasurableSpace α}
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(m : ℕ)
:
ProbabilityTheory.HasLaw (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => ω.2 m a) (ν a) (Bandit.measure alg ν)
Law of Y
conditioned on the event s
.
Equations
- One or more equations did not get rendered due to their size.
Instances For
Law of Y
conditioned on the event that X
is in s
.
Equations
- One or more equations did not get rendered due to their size.
Instances For
Law of Y
conditioned on the event that X
equals x
.
Equations
- One or more equations did not get rendered due to their size.
Instances For
Law of Y
conditioned on X
.
Equations
- One or more equations did not get rendered due to their size.
Instances For
theorem
Bandits.condDistrib_reward''
{α : Type u_1}
{mα : MeasurableSpace α}
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Nonempty α]
(n : ℕ)
:
theorem
Bandits.reward_cond_arm
{α : Type u_1}
{mα : MeasurableSpace α}
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Nonempty α]
[Countable α]
(a : α)
(n : ℕ)
(hμa : (MeasureTheory.Measure.map (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => arm n ω.1) (Bandit.measure alg ν)) {a} ≠ 0)
:
theorem
Bandits.condIndepFun_reward_stepsUntil_arm
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Countable α]
[Nonempty α]
(a : α)
(m n : ℕ)
(hm : m ≠ 0)
:
theorem
Bandits.reward_cond_stepsUntil
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace α]
[Countable α]
[Nonempty α]
(a : α)
(m n : ℕ)
(hm : m ≠ 0)
(hμn : (Bandit.measure alg ν) ((fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1) ⁻¹' {↑n}) ≠ 0)
:
theorem
Bandits.condDistrib_rewardByCount_stepsUntil
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(m : ℕ)
(hm : m ≠ 0)
:
⇑𝓛[fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2 | fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) =>
stepsUntil a m ω.1; Bandit.measure alg
ν] =ᵐ[MeasureTheory.Measure.map (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1)
(Bandit.measure alg ν)] ⇑(ProbabilityTheory.Kernel.const ℕ∞ (ν a))
theorem
Bandits.hasLaw_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(m : ℕ)
(hm : m ≠ 0)
:
ProbabilityTheory.HasLaw (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2) (ν a) (Bandit.measure alg ν)
The reward received at the m
-th pull of arm a
has law ν a
.
theorem
Bandits.identDistrib_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(n m : ℕ)
(hn : n ≠ 0)
(hm : m ≠ 0)
:
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2)
(fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2) (Bandit.measure alg ν) (Bandit.measure alg ν)
theorem
Bandits.identDistrib_rewardByCount_id
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(n : ℕ)
(hn : n ≠ 0)
:
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2) id
(Bandit.measure alg ν) (ν a)
theorem
Bandits.identDistrib_rewardByCount_eval
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
(a : α)
(n m : ℕ)
(hn : n ≠ 0)
:
ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2)
(fun (ω : ℕ → α → ℝ) => ω m a) (Bandit.measure alg ν) (Bandit.streamMeasure ν)
theorem
Bandits.iIndepFun_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[MeasurableSingletonClass α]
(alg : Learning.Algorithm α ℝ)
(ν : ProbabilityTheory.Kernel α ℝ)
[ProbabilityTheory.IsMarkovKernel ν]
:
ProbabilityTheory.iIndepFun (fun (p : α × ℕ) (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount p.1 p.2 ω.1 ω.2)
(Bandit.measure alg ν)