Law of the sum of rewards #
theorem
measurable_sum_range_of_le
{α : Type u_1}
{mα : MeasurableSpace α}
{f : ℕ → α → ℝ}
{g : α → ℕ}
{n : ℕ}
(hg_le : ∀ (a : α), g a ≤ n)
(hf : ∀ (i : ℕ), Measurable (f i))
(hg : Measurable g)
:
Measurable fun (a : α) => ∑ i ∈ Finset.range (g a), f i a
theorem
measurable_sum_Icc_of_le
{α : Type u_1}
{mα : MeasurableSpace α}
{f : ℕ → α → ℝ}
{g : α → ℕ}
{n : ℕ}
(hg_le : ∀ (a : α), g a ≤ n)
(hf : ∀ (i : ℕ), Measurable (f i))
(hg : Measurable g)
:
Measurable fun (a : α) => ∑ i ∈ Finset.Icc 1 (g a), f i a
theorem
Bandits.ArrayModel.identDistrib_pullCount_prod_sum_Icc_rewardByCount'
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
ProbabilityTheory.IdentDistrib
(fun (ω : probSpace α ℝ × (ℕ → α → ℝ)) (a : α) =>
(Learning.pullCount (action alg) a n ω.1, ∑ i ∈ Finset.Icc 1 (Learning.pullCount (action alg) a n ω.1),
Learning.rewardByCount (action alg) (reward alg) a i ω))
(fun (ω : probSpace α ℝ) (a : α) =>
(Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.Icc 1 (Learning.pullCount (action alg) a n ω), ω.2 (i - 1) a))
((arrayMeasure ν).prod (Bandit.streamMeasure ν)) (arrayMeasure ν)
theorem
Bandits.ArrayModel.identDistrib_pullCount_prod_sum_Icc_rewardByCount
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
ProbabilityTheory.IdentDistrib
(fun (ω : probSpace α ℝ × (ℕ → α → ℝ)) (a : α) =>
(Learning.pullCount (action alg) a n ω.1, ∑ i ∈ Finset.Icc 1 (Learning.pullCount (action alg) a n ω.1),
Learning.rewardByCount (action alg) (reward alg) a i ω))
(fun (ω : probSpace α ℝ) (a : α) =>
(Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a))
((arrayMeasure ν).prod (Bandit.streamMeasure ν)) (arrayMeasure ν)
theorem
Bandits.ArrayModel.identDistrib_pullCount_prod_sumRewards
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
ProbabilityTheory.IdentDistrib
(fun (ω : probSpace α ℝ) (a : α) =>
(Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω))
(fun (ω : probSpace α ℝ) (a : α) =>
(Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a))
(arrayMeasure ν) (arrayMeasure ν)
theorem
Bandits.ArrayModel.identDistrib_pullCount_prod_sumRewards_arm
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(n : ℕ)
:
ProbabilityTheory.IdentDistrib
(fun (ω : probSpace α ℝ) =>
(Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω))
(fun (ω : probSpace α ℝ) =>
(Learning.pullCount (action alg) a n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a))
(arrayMeasure ν) (arrayMeasure ν)
theorem
Bandits.ArrayModel.identDistrib_pullCount_prod_sumRewards_two_arms
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a b : α)
(n : ℕ)
:
ProbabilityTheory.IdentDistrib
(fun (ω : probSpace α ℝ) =>
(Learning.pullCount (action alg) a n ω, Learning.pullCount (action alg) b n ω, Learning.sumRewards (action alg) (reward alg) a n ω, Learning.sumRewards (action alg) (reward alg) b n ω))
(fun (ω : probSpace α ℝ) =>
(Learning.pullCount (action alg) a n ω, Learning.pullCount (action alg) b n ω, ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a, ∑ i ∈ Finset.range (Learning.pullCount (action alg) b n ω), ω.2 i b))
(arrayMeasure ν) (arrayMeasure ν)
theorem
Bandits.ArrayModel.identDistrib_sumRewards
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ) (a : α) => Learning.sumRewards (action alg) (reward alg) a n ω)
(fun (ω : probSpace α ℝ) (a : α) => ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a)
(arrayMeasure ν) (arrayMeasure ν)
theorem
Bandits.ArrayModel.identDistrib_sumRewards_arm
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(n : ℕ)
:
ProbabilityTheory.IdentDistrib (Learning.sumRewards (action alg) (reward alg) a n)
(fun (ω : probSpace α ℝ) => ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω), ω.2 i a) (arrayMeasure ν)
(arrayMeasure ν)
theorem
Bandits.ArrayModel.identDistrib_sum_range_snd
{α : Type u_1}
{mα : MeasurableSpace α}
[Countable α]
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(k : ℕ)
:
ProbabilityTheory.IdentDistrib (fun (ω : probSpace α ℝ) => ∑ i ∈ Finset.range k, ω.2 i a)
(fun (ω : ℕ → α → ℝ) => ∑ i ∈ Finset.range k, ω i a) (arrayMeasure ν) (Bandit.streamMeasure ν)
theorem
Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(n : ℕ)
{s : Set (ℕ × ℝ)}
[DecidablePred fun (x : ℕ) => x ∈ Prod.fst '' s]
(hs : MeasurableSet s)
:
theorem
Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
(n : ℕ)
{s : Set ℕ}
[DecidablePred fun (x : ℕ) => x ∈ s]
(hs : MeasurableSet s)
{B : Set ℝ}
(hB : MeasurableSet B)
:
(arrayMeasure ν)
{ω : probSpace α ℝ | Learning.pullCount (action alg) a n ω ∈ s ∧ Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (Bandit.streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ B}
theorem
Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Fintype α]
(a : α)
(n m₁ m₂ : ℕ)
:
(arrayMeasure ν)
{ω : probSpace α ℝ | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ (Bandit.streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}
theorem
Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le
{α : Type u_1}
{mα : MeasurableSpace α}
[DecidableEq α]
[Countable α]
[StandardBorelSpace α]
[Nonempty α]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Fintype α]
(a : α)
(n m₁ m₂ : ℕ)
:
(arrayMeasure ν).real
{ω : probSpace α ℝ | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ (Bandit.streamMeasure ν).real {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}
theorem
Learning.IsAlgEnvSeq.law_sumRewards_unique
{α : Type u_1}
{Ω : Type u_2}
{Ω' : Type u_3}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{mΩ' : MeasurableSpace Ω'}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{P' : MeasureTheory.Measure Ω'}
[MeasureTheory.IsProbabilityMeasure P']
{alg : Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{A₂ : ℕ → Ω' → α}
{R₂ : ℕ → Ω' → ℝ}
{n : ℕ}
{a : α}
[StandardBorelSpace α]
[Nonempty α]
(h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P')
:
MeasureTheory.Measure.map (sumRewards A R a n) P = MeasureTheory.Measure.map (sumRewards A₂ R₂ a n) P'
theorem
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'
{α : Type u_1}
{Ω : Type u_2}
{Ω' : Type u_3}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{mΩ' : MeasurableSpace Ω'}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{P' : MeasureTheory.Measure Ω'}
[MeasureTheory.IsProbabilityMeasure P']
{alg : Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{A₂ : ℕ → Ω' → α}
{R₂ : ℕ → Ω' → ℝ}
{n : ℕ}
[StandardBorelSpace α]
[Nonempty α]
(h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P')
:
ProbabilityTheory.IdentDistrib (fun (ω : Ω) (a : α) => (pullCount A a n ω, sumRewards A R a n ω))
(fun (ω : Ω') (a : α) => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'
theorem
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique
{α : Type u_1}
{Ω : Type u_2}
{Ω' : Type u_3}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{mΩ' : MeasurableSpace Ω'}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{P' : MeasureTheory.Measure Ω'}
[MeasureTheory.IsProbabilityMeasure P']
{alg : Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{A₂ : ℕ → Ω' → α}
{R₂ : ℕ → Ω' → ℝ}
{n : ℕ}
{a : α}
[StandardBorelSpace α]
[Nonempty α]
(h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P')
:
MeasureTheory.Measure.map (fun (ω : Ω) => (pullCount A a n ω, sumRewards A R a n ω)) P = MeasureTheory.Measure.map (fun (ω : Ω') => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'
theorem
Bandits.prob_pullCount_prod_sumRewards_mem_le
{α : Type u_1}
{Ω : Type u_2}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{n : ℕ}
{a : α}
[StandardBorelSpace α]
[Nonempty α]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
{s : Set (ℕ × ℝ)}
[DecidablePred fun (x : ℕ) => x ∈ Prod.fst '' s]
(hs : MeasurableSet s)
:
theorem
Bandits.prob_pullCount_mem_and_sumRewards_mem_le
{α : Type u_1}
{Ω : Type u_2}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{n : ℕ}
{a : α}
[StandardBorelSpace α]
[Nonempty α]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
{s : Set ℕ}
[DecidablePred fun (x : ℕ) => x ∈ s]
(hs : MeasurableSet s)
{B : Set ℝ}
(hB : MeasurableSet B)
:
P {ω : Ω | Learning.pullCount A a n ω ∈ s ∧ Learning.sumRewards A R a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (Bandit.streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ B}
theorem
Bandits.prob_sumRewards_mem_le
{α : Type u_1}
{Ω : Type u_2}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{n : ℕ}
{a : α}
[StandardBorelSpace α]
[Nonempty α]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
{B : Set ℝ}
(hB : MeasurableSet B)
:
P (Learning.sumRewards A R a n ⁻¹' B) ≤ ∑ k ∈ Finset.range (n + 1), (Bandit.streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range k, ω i a ∈ B}
theorem
Bandits.prob_pullCount_eq_and_sumRewards_mem_le
{α : Type u_1}
{Ω : Type u_2}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
{n : ℕ}
{a : α}
[StandardBorelSpace α]
[Nonempty α]
[Countable α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
{m : ℕ}
(hm : m ≤ n)
{B : Set ℝ}
(hB : MeasurableSet B)
:
P {ω : Ω | Learning.pullCount A a n ω = m ∧ Learning.sumRewards A R a n ω ∈ B} ≤ (Bandit.streamMeasure ν) {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m, ω i a ∈ B}
theorem
Bandits.probReal_sumRewards_le_sumRewards_le
{α : Type u_1}
{Ω : Type u_2}
[DecidableEq α]
{mα : MeasurableSpace α}
{mΩ : MeasurableSpace Ω}
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{alg : Learning.Algorithm α ℝ}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{A : ℕ → Ω → α}
{R : ℕ → Ω → ℝ}
[StandardBorelSpace α]
[Nonempty α]
[Fintype α]
(h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
(a : α)
(n m₁ m₂ : ℕ)
:
P.real
{ω : Ω | Learning.pullCount A (bestArm ν) n ω = m₁ ∧ Learning.pullCount A a n ω = m₂ ∧ Learning.sumRewards A R (bestArm ν) n ω ≤ Learning.sumRewards A R a n ω} ≤ (Bandit.streamMeasure ν).real {ω : ℕ → α → ℝ | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}
theorem
Bandits.probReal_sum_le_sum_streamMeasure
{α : Type u_1}
{mα : MeasurableSpace α}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
[Nonempty α]
[Fintype α]
(hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
(a : α)
(m : ℕ)
:
theorem
Bandits.prob_sum_le_sqrt_log
{α : Type u_1}
{mα : MeasurableSpace α}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
{c : ℝ}
(hc : 0 ≤ c)
(a : α)
(k : ℕ)
(hk : k ≠ 0)
:
theorem
Bandits.prob_sum_ge_sqrt_log
{α : Type u_1}
{mα : MeasurableSpace α}
{ν : ProbabilityTheory.Kernel α ℝ}
[ProbabilityTheory.IsMarkovKernel ν]
{n : ℕ}
(hν : ∀ (a : α), ProbabilityTheory.HasSubgaussianMGF (fun (x : ℝ) => x - ∫ (x : ℝ), id x ∂ν a) 1 (ν a))
{c : ℝ}
(hc : 0 ≤ c)
(a : α)
(k : ℕ)
(hk : k ≠ 0)
: