Law of the sum of rewards #

theorem Bandits.ArrayModel.sum_Icc_one_eq_sum_range {m : ℕ} {f : ℕ → ℝ} :

∑ i ∈ Finset.Icc 1 m, f (i - 1) = ∑ i ∈ Finset.range m, f i

theorem Bandits.ArrayModel.identDistrib_pullCount_prod_sum_Icc_rewardByCount' {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ × (ℕ → α → ℝ)) (a : α) => (Learning.pullCount (action alg) a n ω.1, ∑ i ∈ Finset.Icc 1 (Learning.pullCount (action alg) a n ω.1), Learning.rewardByCount (action alg) (reward alg) a i ω)) (fun (ω : probSpace α ℝ) (a : α) => (Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.Icc 1 (Learning.pullCount (action alg) a n ω), ω.2 (i - 1) a)) ((arrayMeasure ν).prod (streamMeasure ν)) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.identDistrib_pullCount_prod_sum_Icc_rewardByCount {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ × (ℕ → α → ℝ)) (a : α) => (Learning.pullCount (action alg) a n ω.1, ∑ i ∈ Finset.Icc 1 (Learning.pullCount (action alg) a n ω.1), Learning.rewardByCount (action alg) (reward alg) a i ω)) (fun (ω : probSpace α ℝ) (a : α) => (Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a)) ((arrayMeasure ν).prod (streamMeasure ν)) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.identDistrib_pullCount_prod_sumRewards {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ) (a : α) => (Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω)) (fun (ω : probSpace α ℝ) (a : α) => (Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a)) (arrayMeasure ν) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.identDistrib_pullCount_prod_sumRewards_arm {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n : ℕ) :

ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ) => (Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω)) (fun (ω : probSpace α ℝ) => (Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a)) (arrayMeasure ν) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.identDistrib_pullCount_prod_sumRewards_two_arms {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a b : α) (n : ℕ) :

ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ) => (Learning.pullCount (action alg) a n ω, Learning.pullCount (action alg) b n ω, Learning.sumRewards (action alg) (reward alg) a n ω, Learning.sumRewards (action alg) (reward alg) b n ω)) (fun (ω : probSpace α ℝ) => (Learning.pullCount (action alg) a n ω, Learning.pullCount (action alg) b n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a, ∑ i ∈ Finset.range (Learning.pullCount (action alg) b n ω), ω.2 i b)) (arrayMeasure ν) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.identDistrib_sumRewards {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ) (a : α) => Learning.sumRewards (action alg) (reward alg) a n ω) (fun (ω : probSpace α ℝ) (a : α) => ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a) (arrayMeasure ν) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.identDistrib_sumRewards_arm {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n : ℕ) :

ProbabilityTheory.IdentDistrib (Learning.sumRewards (action alg) (reward alg) a n) (fun (ω : probSpace α ℝ) => ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a) (arrayMeasure ν) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.identDistrib_sum_range_snd {α : Type u_1} {mα : MeasurableSpace α} [Countable α] {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : α) (k : ℕ) :

ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ) => ∑ i ∈ Finset.range k, ω.2 i a) (fun (ω : ℕ → α → ℝ) => ∑ i ∈ Finset.range k, ω i a) (arrayMeasure ν) (streamMeasure ν)

source

theorem Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n : ℕ) {s : Set (ℕ × ℝ)} [DecidablePred fun (x : ℕ) => x ∈ Prod.fst '' s] (hs : MeasurableSet s) :

(arrayMeasure ν) {ω : probSpace α ℝ | (Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω) ∈ s} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s, (streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}

source

theorem Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n : ℕ) {s : Set ℕ} [DecidablePred fun (x : ℕ) => x ∈ s] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :

(arrayMeasure ν) {ω : probSpace α ℝ | Learning.pullCount (action alg) a n ω ∈ s ∧ Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ B}

source

theorem Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Fintype α] (a : α) (n m₁ m₂ : ℕ) :

(arrayMeasure ν) {ω : probSpace α ℝ | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ (streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}

source

theorem Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le {α : Type u_1} {mα : MeasurableSpace α} [DecidableEq α] [Countable α] [StandardBorelSpace α] [Nonempty α] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Fintype α] (a : α) (n m₁ m₂ : ℕ) :

(arrayMeasure ν).real {ω : probSpace α ℝ | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ (streamMeasure ν).real {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}

source

theorem Bandits.sumRewards_eq_comp {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {n : ℕ} {a : α} :

Learning.sumRewards A R a n = (fun (p : ℕ → α × ℝ) => ∑ i ∈ Finset.range n, if (p i).1 = a then (p i).2 else 0) ∘ fun (ω : Ω) (n : ℕ) => (A n ω, R n ω)

source

theorem Bandits.pullCount_eq_comp {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {n : ℕ} {a : α} :

Learning.pullCount A a n = (fun (p : ℕ → α × ℝ) => ∑ i ∈ Finset.range n, if (p i).1 = a then 1 else 0) ∘ fun (ω : Ω) (n : ℕ) => (A n ω, R n ω)

source

theorem Learning.IsAlgEnvSeq.law_sumRewards_unique {α : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → α} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : α} [StandardBorelSpace α] [Nonempty α] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :

MeasureTheory.Measure.map (sumRewards A R a n) P = MeasureTheory.Measure.map (sumRewards A₂ R₂ a n) P'

source

theorem Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique' {α : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → α} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} [StandardBorelSpace α] [Nonempty α] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :

ProbabilityTheory.IdentDistrib (fun (ω : Ω) (a : α) => (pullCount A a n ω, sumRewards A R a n ω)) (fun (ω : Ω') (a : α) => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'

source

theorem Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique {α : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → α} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : α} [StandardBorelSpace α] [Nonempty α] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :

MeasureTheory.Measure.map (fun (ω : Ω) => (pullCount A a n ω, sumRewards A R a n ω)) P = MeasureTheory.Measure.map (fun (ω : Ω') => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'

source

theorem Bandits.prob_pullCount_prod_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {n : ℕ} {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set (ℕ × ℝ)} [DecidablePred fun (x : ℕ) => x ∈ Prod.fst '' s] (hs : MeasurableSet s) :

P {ω : Ω | (Learning.pullCount A a n ω, Learning.sumRewards A R a n ω) ∈ s} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s, (streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}

source

theorem Bandits.prob_pullCount_mem_and_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {n : ℕ} {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set ℕ} [DecidablePred fun (x : ℕ) => x ∈ s] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :

P {ω : Ω | Learning.pullCount A a n ω ∈ s ∧ Learning.sumRewards A R a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ B}

source

theorem Bandits.prob_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {n : ℕ} {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {B : Set ℝ} (hB : MeasurableSet B) :

P (Learning.sumRewards A R a n ⁻¹' B) ≤ ∑ k ∈ Finset.range (n + 1), (streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ B}

source

theorem Bandits.prob_pullCount_eq_and_sumRewards_mem_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} {n : ℕ} {a : α} [StandardBorelSpace α] [Nonempty α] [Countable α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {m : ℕ} (hm : m ≤ n) {B : Set ℝ} (hB : MeasurableSet B) :

P {ω : Ω | Learning.pullCount A a n ω = m ∧ Learning.sumRewards A R a n ω ∈ B} ≤ (streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m, ω i a ∈ B}

source

theorem Bandits.probReal_sumRewards_le_sumRewards_le {α : Type u_1} {Ω : Type u_2} [DecidableEq α] {mα : MeasurableSpace α} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm α ℝ} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → α} {R : ℕ → Ω → ℝ} [StandardBorelSpace α] [Nonempty α] [Fintype α] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : α) (n m₁ m₂ : ℕ) :

P.real {ω : Ω | Learning.pullCount A (bestArm ν) n ω = m₁ ∧ Learning.pullCount A a n ω = m₂ ∧ Learning.sumRewards A R (bestArm ν) n ω ≤ Learning.sumRewards A R a n ω} ≤ (streamMeasure ν).real {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}

source

theorem Bandits.probReal_sum_le_sum_streamMeasure {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Nonempty α] [Fintype α] {c : NNReal} (hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) c (ν a)) (a : α) (m : ℕ) :

(streamMeasure ν).real {ω : ℕ → α → ℝ | ∑ s ∈ Finset.range m, ω s (bestArm ν) ≤ ∑ s ∈ Finset.range m, ω s a} ≤ Real.exp (-↑m * gap ν a ^ 2 / (4 * ↑c))

source

theorem Bandits.prob_sum_le_sqrt_log {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal} (hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : α) (k : ℕ) (hk : k ≠ 0) :

(streamMeasure ν) {ω : ℕ → α → ℝ | ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a) ≤ -√(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1))} ≤ 1 / (↑n + 1) ^ c

source

theorem Bandits.prob_sum_ge_sqrt_log {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal} (hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : α) (k : ℕ) (hk : k ≠ 0) :

(streamMeasure ν) {ω : ℕ → α → ℝ | √(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1)) ≤ ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a)} ≤ 1 / (↑n + 1) ^ c

source

theorem Bandits.todo {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ} (hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : α) (n k : ℕ) (hk : k ≠ 0) :

(streamMeasure ν) {ω : ℕ → α → ℝ | (∑ m ∈ Finset.range k, ω m a) / ↑k + √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k) ≤ ∫ (x : ℝ), id x ∂ν a} ≤ 1 / (↑n + 1) ^ c

source

theorem Bandits.todo' {α : Type u_1} {mα : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α ℝ} [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ} (hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : α) (n k : ℕ) (hk : k ≠ 0) :

(streamMeasure ν) {ω : ℕ → α → ℝ | ∫ (x : ℝ), id x ∂ν a ≤ (∑ m ∈ Finset.range k, ω m a) / ↑k - √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k)} ≤ 1 / (↑n + 1) ^ c

Documentation

LeanBandits.Bandit.SumRewards

Law of the sum of rewards #