Documentation

LeanBandits.Bandit.SumRewards

Law of the sum of rewards #

theorem measurable_sum_range_of_le {α : Type u_1} { : MeasurableSpace α} {f : α} {g : α} {n : } (hg_le : ∀ (a : α), g a n) (hf : ∀ (i : ), Measurable (f i)) (hg : Measurable g) :
Measurable fun (a : α) => iFinset.range (g a), f i a
theorem measurable_sum_Icc_of_le {α : Type u_1} { : MeasurableSpace α} {f : α} {g : α} {n : } (hg_le : ∀ (a : α), g a n) (hf : ∀ (i : ), Measurable (f i)) (hg : Measurable g) :
Measurable fun (a : α) => iFinset.Icc 1 (g a), f i a
theorem Bandits.ArrayModel.identDistrib_sumRewards {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ) (a : α) => Learning.sumRewards (action alg) (reward alg) a n ω) (fun (ω : probSpace α ) (a : α) => iFinset.range (Learning.pullCount (action alg) a n ω), ω.2 i a) (arrayMeasure ν) (arrayMeasure ν)
theorem Bandits.ArrayModel.identDistrib_sum_range_snd {α : Type u_1} { : MeasurableSpace α} [Countable α] {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] (a : α) (k : ) :
ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ) => iFinset.range k, ω.2 i a) (fun (ω : α) => iFinset.range k, ω i a) (arrayMeasure ν) (Bandit.streamMeasure ν)
theorem Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n : ) {s : Set } [DecidablePred fun (x : ) => x s] (hs : MeasurableSet s) {B : Set } (hB : MeasurableSet B) :
(arrayMeasure ν) {ω : probSpace α | Learning.pullCount (action alg) a n ω s Learning.sumRewards (action alg) (reward alg) a n ω B} kFinset.range (n + 1) with k s, (Bandit.streamMeasure ν) {ω : α | iFinset.range k, ω i a B}
theorem Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Fintype α] (a : α) (n m₁ m₂ : ) :
(arrayMeasure ν) {ω : probSpace α | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ Learning.pullCount (action alg) a n ω = m₂ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω Learning.sumRewards (action alg) (reward alg) a n ω} (Bandit.streamMeasure ν) {ω : α | iFinset.range m₁, ω i (bestArm ν) iFinset.range m₂, ω i a}
theorem Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le {α : Type u_1} { : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Fintype α] (a : α) (n m₁ m₂ : ) :
(arrayMeasure ν).real {ω : probSpace α | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ Learning.pullCount (action alg) a n ω = m₂ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω Learning.sumRewards (action alg) (reward alg) a n ω} (Bandit.streamMeasure ν).real {ω : α | iFinset.range m₁, ω i (bestArm ν) iFinset.range m₂, ω i a}
theorem Bandits.sumRewards_eq_comp {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {A : Ωα} {R : Ω} {n : } {a : α} :
Learning.sumRewards A R a n = (fun (p : α × ) => iFinset.range n, if (p i).1 = a then (p i).2 else 0) fun (ω : Ω) (n : ) => (A n ω, R n ω)
theorem Bandits.pullCount_eq_comp {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {A : Ωα} {R : Ω} {n : } {a : α} :
Learning.pullCount A a n = (fun (p : α × ) => iFinset.range n, if (p i).1 = a then 1 else 0) fun (ω : Ω) (n : ) => (A n ω, R n ω)
theorem Learning.IsAlgEnvSeq.law_sumRewards_unique {α : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} {A₂ : Ω'α} {R₂ : Ω'} {n : } {a : α} [StandardBorelSpace α] [Nonempty α] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
theorem Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique' {α : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} {A₂ : Ω'α} {R₂ : Ω'} {n : } [StandardBorelSpace α] [Nonempty α] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
ProbabilityTheory.IdentDistrib (fun (ω : Ω) (a : α) => (pullCount A a n ω, sumRewards A R a n ω)) (fun (ω : Ω') (a : α) => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'
theorem Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique {α : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} {A₂ : Ω'α} {R₂ : Ω'} {n : } {a : α} [StandardBorelSpace α] [Nonempty α] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
MeasureTheory.Measure.map (fun (ω : Ω) => (pullCount A a n ω, sumRewards A R a n ω)) P = MeasureTheory.Measure.map (fun (ω : Ω') => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'
theorem Bandits.prob_pullCount_prod_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} {n : } {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set ( × )} [DecidablePred fun (x : ) => x Prod.fst '' s] (hs : MeasurableSet s) :
P {ω : Ω | (Learning.pullCount A a n ω, Learning.sumRewards A R a n ω) s} kFinset.range (n + 1) with k Prod.fst '' s, (Bandit.streamMeasure ν) {ω : α | iFinset.range k, ω i a Prod.mk k ⁻¹' s}
theorem Bandits.prob_pullCount_mem_and_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} {n : } {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set } [DecidablePred fun (x : ) => x s] (hs : MeasurableSet s) {B : Set } (hB : MeasurableSet B) :
P {ω : Ω | Learning.pullCount A a n ω s Learning.sumRewards A R a n ω B} kFinset.range (n + 1) with k s, (Bandit.streamMeasure ν) {ω : α | iFinset.range k, ω i a B}
theorem Bandits.prob_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} {n : } {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {B : Set } (hB : MeasurableSet B) :
P (Learning.sumRewards A R a n ⁻¹' B) kFinset.range (n + 1), (Bandit.streamMeasure ν) {ω : α | iFinset.range k, ω i a B}
theorem Bandits.prob_pullCount_eq_and_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} {n : } {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {m : } (hm : m n) {B : Set } (hB : MeasurableSet B) :
P {ω : Ω | Learning.pullCount A a n ω = m Learning.sumRewards A R a n ω B} (Bandit.streamMeasure ν) {ω : α | iFinset.range m, ω i a B}
theorem Bandits.probReal_sumRewards_le_sumRewards_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] { : MeasurableSpace α} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α } {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {A : Ωα} {R : Ω} [StandardBorelSpace α] [Nonempty α] [Fintype α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : α) (n m₁ m₂ : ) :
P.real {ω : Ω | Learning.pullCount A (bestArm ν) n ω = m₁ Learning.pullCount A a n ω = m₂ Learning.sumRewards A R (bestArm ν) n ω Learning.sumRewards A R a n ω} (Bandit.streamMeasure ν).real {ω : α | iFinset.range m₁, ω i (bestArm ν) iFinset.range m₂, ω i a}
theorem Bandits.probReal_sum_le_sum_streamMeasure {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] [Nonempty α] [Fintype α] ( : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ) => x - (x : ), id x ν a) 1 (ν a)) (a : α) (m : ) :
(Bandit.streamMeasure ν).real {ω : α | sFinset.range m, ω s (bestArm ν) sFinset.range m, ω s a} Real.exp (-m * gap ν a ^ 2 / 4)
theorem Bandits.prob_sum_le_sqrt_log {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {n : } ( : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ) => x - (x : ), id x ν a) 1 (ν a)) {c : } (hc : 0 c) (a : α) (k : ) (hk : k 0) :
(Bandit.streamMeasure ν) {ω : α | sFinset.range k, (ω s a - (x : ), id x ν a) -(c * k * Real.log (n + 1))} 1 / (n + 1) ^ (c / 2)
theorem Bandits.prob_sum_ge_sqrt_log {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } [ProbabilityTheory.IsMarkovKernel ν] {n : } ( : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ) => x - (x : ), id x ν a) 1 (ν a)) {c : } (hc : 0 c) (a : α) (k : ) (hk : k 0) :
(Bandit.streamMeasure ν) {ω : α | (c * k * Real.log (n + 1)) sFinset.range k, (ω s a - (x : ), id x ν a)} 1 / (n + 1) ^ (c / 2)