Laws of stepsUntil and rewardByCount #
theorem
Bandits.hasLaw_Z
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(m : ℕ)
:
ProbabilityTheory.HasLaw (fun (ω : Ω × (ℕ → α → ℝ)) => ω.2 m a) (ν a) (P.prod (Bandit.streamMeasure ν))
Law of Y conditioned on the event s.
Equations
- One or more equations did not get rendered due to their size.
Instances For
Law of Y conditioned on the event that X is in s.
Equations
- One or more equations did not get rendered due to their size.
Instances For
Law of Y conditioned on the event that X equals x.
Equations
- One or more equations did not get rendered due to their size.
Instances For
theorem
Bandits.condDistrib_reward''
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(n : ℕ)
:
theorem
Bandits.reward_cond_action
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(n : ℕ)
(hμa : (MeasureTheory.Measure.map (fun (ω : Ω × (ℕ → α → ℝ)) => A n ω.1) (P.prod (Bandit.streamMeasure ν))) {a} ≠ 0)
:
theorem
Bandits.condIndepFun_reward_stepsUntil_action'
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(m n : ℕ)
:
ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (A n) inferInstance) ⋯ (R n)
({ω : Ω | Learning.stepsUntil A a m ω = ↑n}.indicator fun (x : Ω) => 1) P
theorem
Bandits.condIndepFun_reward_stepsUntil_action
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(m n : ℕ)
:
theorem
Bandits.reward_cond_stepsUntil
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(m n : ℕ)
(hm : m ≠ 0)
(hμn : (P.prod (Bandit.streamMeasure ν)) ((fun (ω : Ω × (ℕ → α → ℝ)) => Learning.stepsUntil A a m ω.1) ⁻¹' {↑n}) ≠ 0)
:
theorem
Bandits.condDistrib_rewardByCount_stepsUntil
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(m : ℕ)
(hm : m ≠ 0)
:
⇑𝓛[Learning.rewardByCount A R a m | fun (ω : Ω × (ℕ → α → ℝ)) => Learning.stepsUntil A a m ω.1; P.prod
(Bandit.streamMeasure
ν)] =ᵐ[MeasureTheory.Measure.map (fun (ω : Ω × (ℕ → α → ℝ)) => Learning.stepsUntil A a m ω.1)
(P.prod (Bandit.streamMeasure ν))] ⇑(ProbabilityTheory.Kernel.const ℕ∞ (ν a))
The conditional distribution of the reward received at the m-th pull of action a
given the time at which number of pulls is m is the constant kernel with value ν a.
theorem
Bandits.hasLaw_rewardByCount
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(m : ℕ)
(hm : m ≠ 0)
:
ProbabilityTheory.HasLaw (Learning.rewardByCount A R a m) (ν a) (P.prod (Bandit.streamMeasure ν))
The reward received at the m-th pull of action a has law ν a.
theorem
Bandits.identDistrib_rewardByCount
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(n m : ℕ)
(hn : n ≠ 0)
(hm : m ≠ 0)
:
ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) (Learning.rewardByCount A R a m)
(P.prod (Bandit.streamMeasure ν)) (P.prod (Bandit.streamMeasure ν))
theorem
Bandits.identDistrib_rewardByCount_id
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(n : ℕ)
(hn : n ≠ 0)
:
ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) id (P.prod (Bandit.streamMeasure ν)) (ν a)
theorem
Bandits.identDistrib_rewardByCount_eval
{α : Type u_1}
{Ω : Type u_2}
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
[DecidableEq α]
[StandardBorelSpace α]
[Nonempty α]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[StandardBorelSpace Ω]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(n m : ℕ)
(hn : n ≠ 0)
:
ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) (fun (ω : ℕ → α → ℝ) => ω m a)
(P.prod (Bandit.streamMeasure ν)) (Bandit.streamMeasure ν)
theorem
Bandits.identDistrib_eval_streamMeasure_measure
{α : Type u_1}
{mα : MeasurableSpace α}
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
:
ProbabilityTheory.IdentDistrib (fun (ω : ℕ → α → ℝ) (n : ℕ) => ω n a)
(fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (n : ℕ) => ω.2 n a) (Bandit.streamMeasure ν) (Bandit.measure alg ν)