Laws of `stepsUntil` and `rewardByCount` #

source

theorem Bandits.measurable_pullCount {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (t : ℕ) :

Measurable fun (h : ℕ → α × ℝ) => pullCount a t h

source

theorem Bandits.measurable_sumRewards {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (t : ℕ) :

Measurable (sumRewards a t)

source

theorem Bandits.measurable_stepsUntil {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m : ℕ) :

Measurable fun (h : ℕ → α × ℝ) => stepsUntil a m h

source

theorem Bandits.measurable_stepsUntil' {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m : ℕ) :

Measurable fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1

source

theorem Bandits.measurable_rewardByCount {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m : ℕ) :

Measurable fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2

source

theorem Bandits.hasLaw_Z {α : Type u_1} {mα : MeasurableSpace α} {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : α) (m : ℕ) :

ProbabilityTheory.HasLaw (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => ω.2 m a) (ν a) (Bandit.measure alg ν)

source

def Bandits.«term𝓛[_|_;_]» :

Lean.ParserDescr

Law of Y conditioned on the event s.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

def Bandits.«term𝓛[_|_In_;_]» :

Lean.ParserDescr

Law of Y conditioned on the event that X is in s.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

def Bandits.«term𝓛[_|_←_;_]» :

Lean.ParserDescr

Law of Y conditioned on the event that X equals x.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

theorem Bandits.condDistrib_reward'' {α : Type u_1} {mα : MeasurableSpace α} {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Nonempty α] (n : ℕ) :

⇑𝓛[fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => reward n ω.1 | fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => arm n ω.1; Bandit.measure alg ν] =ᵐ[MeasureTheory.Measure.map (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => arm n ω.1) (Bandit.measure alg ν)] ⇑ν

source

theorem Bandits.reward_cond_arm {α : Type u_1} {mα : MeasurableSpace α} [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Nonempty α] [Countable α] (a : α) (n : ℕ) (hμa : (MeasureTheory.Measure.map (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => arm n ω.1) (Bandit.measure alg ν)) {a} ≠ 0) :

𝓛[fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => reward n ω.1 | fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => arm n ω.1 in {a}; Bandit.measure alg ν] = ν a

source

theorem Bandits.measurable_comap_indicator_stepsUntil_eq {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m n : ℕ) :

Measurable ({ω : ℕ → α × ℝ | stepsUntil a m ω = ↑n}.indicator fun (x : ℕ → α × ℝ) => 1)

source

theorem Bandits.measurable_indicator_stepsUntil_eq {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m n : ℕ) :

Measurable ({ω : ℕ → α × ℝ | stepsUntil a m ω = ↑n}.indicator fun (x : ℕ → α × ℝ) => 1)

source

theorem Bandits.measurableSet_stepsUntil_eq {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (a : α) (m n : ℕ) :

MeasurableSet {ω : ℕ → α × ℝ | stepsUntil a m ω = ↑n}

source

theorem Bandits.condIndepFun_reward_stepsUntil_arm' {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Countable α] [Nonempty α] (a : α) (m n : ℕ) (hm : m ≠ 0) :

ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (arm n) mα) ⋯ (reward n) ({ω : ℕ → α × ℝ | stepsUntil a m ω = ↑n}.indicator fun (x : ℕ → α × ℝ) => 1) (Bandit.trajMeasure alg ν)

source

theorem Bandits.condIndepFun_reward_stepsUntil_arm {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Countable α] [Nonempty α] (a : α) (m n : ℕ) (hm : m ≠ 0) :

ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => arm n ω.1) mα) ⋯ (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => reward n ω.1) ({ω : (ℕ → α × ℝ) × (ℕ → α → ℝ) | stepsUntil a m ω.1 = ↑n}.indicator fun (x : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => 1) (Bandit.measure alg ν)

source

theorem Bandits.reward_cond_stepsUntil {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Countable α] [Nonempty α] (a : α) (m n : ℕ) (hm : m ≠ 0) (hμn : (Bandit.measure alg ν) ((fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1) ⁻¹' {↑n}) ≠ 0) :

𝓛[fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => reward n ω.1 | fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1 in {↑n}; Bandit.measure alg ν] = ν a

source

theorem Bandits.condDistrib_rewardByCount_stepsUntil {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (m : ℕ) (hm : m ≠ 0) :

⇑𝓛[fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2 | fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1; Bandit.measure alg ν] =ᵐ[MeasureTheory.Measure.map (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => stepsUntil a m ω.1) (Bandit.measure alg ν)] ⇑(ProbabilityTheory.Kernel.const ℕ∞ (ν a))

source

theorem Bandits.hasLaw_rewardByCount {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (m : ℕ) (hm : m ≠ 0) :

ProbabilityTheory.HasLaw (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2) (ν a) (Bandit.measure alg ν)

The reward received at the m-th pull of arm a has law ν a.

source

theorem Bandits.identDistrib_rewardByCount {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (n m : ℕ) (hn : n ≠ 0) (hm : m ≠ 0) :

ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2) (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a m ω.1 ω.2) (Bandit.measure alg ν) (Bandit.measure alg ν)

source

theorem Bandits.identDistrib_rewardByCount_id {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (n : ℕ) (hn : n ≠ 0) :

ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2) id (Bandit.measure alg ν) (ν a)

source

theorem Bandits.identDistrib_rewardByCount_eval {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) (n m : ℕ) (hn : n ≠ 0) :

ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2) (fun (ω : ℕ → α → ℝ) => ω m a) (Bandit.measure alg ν) (Bandit.streamMeasure ν)

source

theorem Bandits.indepFun_rewardByCount_Iic {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (alg : Learning.Algorithm α ℝ) (ν : ProbabilityTheory.Kernel α ℝ) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n : ℕ) :

ProbabilityTheory.IndepFun (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a (n + 1) ω.1 ω.2) (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (i : ↥(Finset.Iic n)) => rewardByCount a (↑i) ω.1 ω.2) (Bandit.measure alg ν)

source

theorem Bandits.iIndepFun_rewardByCount' {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (alg : Learning.Algorithm α ℝ) (ν : ProbabilityTheory.Kernel α ℝ) [ProbabilityTheory.IsMarkovKernel ν] (a : α) :

ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount a n ω.1 ω.2) (Bandit.measure alg ν)

source

theorem Bandits.iIndepFun_rewardByCount {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] (alg : Learning.Algorithm α ℝ) (ν : ProbabilityTheory.Kernel α ℝ) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.iIndepFun (fun (p : α × ℕ) (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) => rewardByCount p.1 p.2 ω.1 ω.2) (Bandit.measure alg ν)

source

theorem Bandits.identDistrib_rewardByCount_stream' {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) :

ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (n : ℕ) => rewardByCount a (n + 1) ω.1 ω.2) (fun (ω : ℕ → α → ℝ) (n : ℕ) => ω n a) (Bandit.measure alg ν) (Bandit.streamMeasure ν)

source

theorem Bandits.identDistrib_rewardByCount_stream {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Countable α] [StandardBorelSpace α] [Nonempty α] (a : α) :

ProbabilityTheory.IdentDistrib (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (n : ℕ) => rewardByCount a (n + 1) ω.1 ω.2) (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (n : ℕ) => ω.2 n a) (Bandit.measure alg ν) (Bandit.measure alg ν)

source

theorem Bandits.indepFun_rewardByCount_of_ne {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [MeasurableSingletonClass α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {a b : α} (hab : a ≠ b) :

ProbabilityTheory.IndepFun (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (s : ℕ) => rewardByCount a s ω.1 ω.2) (fun (ω : (ℕ → α × ℝ) × (ℕ → α → ℝ)) (s : ℕ) => rewardByCount b s ω.1 ω.2) (Bandit.measure alg ν)

Documentation

LeanBandits.RewardByCountMeasure

Laws of `stepsUntil` and `rewardByCount` #

Laws of stepsUntil and rewardByCount #

Laws of `stepsUntil` and `rewardByCount` #