Bandit #

source

noncomputable def Bandits.streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) :

MeasureTheory.Measure (ℕ → α → R)

Measure of an infinite stream of rewards from each action.

Equations

Bandits.streamMeasure ν = MeasureTheory.Measure.infinitePi fun (x : ℕ) => MeasureTheory.Measure.infinitePi ⇑ν

Instances For

source

instance Bandits.instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.IsProbabilityMeasure (streamMeasure ν)

source

theorem hasLaw_eval_infinitePi {ι : Type u_3} {X : ι → Type u_4} {mX : (i : ι) → MeasurableSpace (X i)} (μ : (i : ι) → MeasureTheory.Measure (X i)) [hμ : ∀ (i : ι), MeasureTheory.IsProbabilityMeasure (μ i)] (i : ι) :

ProbabilityTheory.HasLaw (Function.eval i) (μ i) (MeasureTheory.Measure.infinitePi μ)

source

theorem Bandits.hasLaw_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasLaw (fun (h : ℕ → α → R) => h n) (MeasureTheory.Measure.infinitePi ⇑ν) (streamMeasure ν)

source

theorem Bandits.hasLaw_eval_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) :

ProbabilityTheory.HasLaw (fun (h : ℕ → α → R) => h n a) (ν a) (streamMeasure ν)

source

theorem Bandits.identDistrib_eval_eval_id_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) :

ProbabilityTheory.IdentDistrib (fun (h : ℕ → α → R) => h n a) id (streamMeasure ν) (ν a)

source

theorem Bandits.integrable_eval_streamMeasure {α : Type u_1} {mα : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ℝ) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) (h_int : MeasureTheory.Integrable id (ν a)) :

MeasureTheory.Integrable (fun (h : ℕ → α → ℝ) => h n a) (streamMeasure ν)

source

theorem Bandits.integral_eval_streamMeasure {α : Type u_1} {mα : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ℝ) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) :

∫ (h : ℕ → α → ℝ), h n a ∂streamMeasure ν = ∫ (x : ℝ), id x ∂ν a

source

theorem Bandits.iIndepFun_eval_streamMeasure' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : ℕ → α → R) => ω n) (streamMeasure ν)

source

theorem Bandits.iIndepFun_eval_streamMeasure'' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) :

ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : ℕ → α → R) => ω n a) (streamMeasure ν)

source

theorem Bandits.iIndepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.iIndepFun (fun (p : ℕ × α) (ω : ℕ → α → R) => ω p.1 p.2) (streamMeasure ν)

source

theorem Bandits.indepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {n m : ℕ} {a b : α} (h : n ≠ m ∨ a ≠ b) :

ProbabilityTheory.IndepFun (fun (ω : ℕ → α → R) => ω n a) (fun (ω : ℕ → α → R) => ω m b) (streamMeasure ν)

source

theorem Bandits.indepFun_eval_streamMeasure' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {a b : α} (h : a ≠ b) :

ProbabilityTheory.IndepFun (fun (ω : ℕ → α → R) (n : ℕ) => ω n a) (fun (ω : ℕ → α → R) (n : ℕ) => ω n b) (streamMeasure ν)

source

def Bandits.ArrayModel.probSpace (α : Type u_1) (R : Type u_2) :

Type (max u_1 u_2)

Probability space for the array model of stochastic bandits.

Equations

Bandits.ArrayModel.probSpace α R = ((ℕ → ↑unitInterval) × (ℕ → α → R))

Instances For

source

instance Bandits.ArrayModel.instMeasurableSpaceProbSpace {α : Type u_3} {R : Type u_4} [MeasurableSpace R] :

MeasurableSpace (probSpace α R)

Equations

Bandits.ArrayModel.instMeasurableSpaceProbSpace = inferInstanceAs (MeasurableSpace ((ℕ → ↑unitInterval) × (ℕ → α → R)))

source

instance Bandits.ArrayModel.instStandardBorelSpaceProbSpaceOfCountable {α : Type u_3} {R : Type u_4} [Countable α] [MeasurableSpace R] [StandardBorelSpace R] :

StandardBorelSpace (probSpace α R)

source

noncomputable def Bandits.ArrayModel.arrayMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) :

MeasureTheory.Measure (probSpace α R)

Probability measure for the array model of stochastic bandits.

Equations

Bandits.ArrayModel.arrayMeasure ν = (MeasureTheory.Measure.infinitePi fun (x : ℕ) => MeasureTheory.volume).prod (Bandits.streamMeasure ν)

Instances For

source

instance Bandits.ArrayModel.instIsProbabilityMeasureProbSpaceArrayMeasureOfIsMarkovKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.IsProbabilityMeasure (arrayMeasure ν)

source

noncomputable def Bandits.ArrayModel.initAlgFunction {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) :

↑unitInterval → α

The initial action is the image of a uniform random variable by this function.

Equations

Bandits.ArrayModel.initAlgFunction alg = ⋯.choose

Instances For

source

theorem Bandits.ArrayModel.initAlgFunction_map {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) :

MeasureTheory.Measure.map (initAlgFunction alg) MeasureTheory.volume = alg.p0

source

theorem Bandits.ArrayModel.measurable_initAlgFunction {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) :

Measurable (initAlgFunction alg)

source

noncomputable def Bandits.ArrayModel.algFunction {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) (n : ℕ) :

(↥(Finset.Iic n) → α × R) → ↑unitInterval → α

The next action is the image of the history and a uniform random variable by this function.

Equations

Bandits.ArrayModel.algFunction alg n = ⋯.choose

Instances For

source

theorem Bandits.ArrayModel.algFunction_map {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) (n : ℕ) (h : ↥(Finset.Iic n) → α × R) :

MeasureTheory.Measure.map (algFunction alg n h) MeasureTheory.volume = (alg.policy n) h

source

theorem Bandits.ArrayModel.measurable_algFunction {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) (n : ℕ) :

Measurable (Function.uncurry (algFunction alg n))

source

noncomputable def Bandits.ArrayModel.hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ℕ) :

↥(Finset.Iic n) → α × R

History of actions and rewards up to time n in the array model.

Equations

One or more equations did not get rendered due to their size.
Bandits.ArrayModel.hist alg ω 0 = fun (x : ↥(Finset.Iic 0)) => (Bandits.ArrayModel.initAlgFunction alg (ω.1 0), ω.2 0 (Bandits.ArrayModel.initAlgFunction alg (ω.1 0)))

Instances For

source

@[simp]

theorem Bandits.ArrayModel.hist_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) :

hist alg ω 0 = fun (x : ↥(Finset.Iic 0)) => (initAlgFunction alg (ω.1 0), ω.2 0 (initAlgFunction alg (ω.1 0)))

source

theorem Bandits.ArrayModel.hist_add_one {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ℕ) :

have a := algFunction alg n (hist alg ω n) (ω.1 (n + 1)); hist alg ω (n + 1) = fun (i : ↥(Finset.Iic (n + 1))) => if hin : ↑i ≤ n then hist alg ω n ⟨↑i, ⋯⟩ else (a, ω.2 (Learning.pullCount' n (hist alg ω n) a) a)

source

theorem Bandits.ArrayModel.hist_eq {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ℕ) :

hist alg ω n = fun (i : ↥(Finset.Iic n)) => hist alg ω ↑i ⟨↑i, ⋯⟩

source

theorem Bandits.ArrayModel.hist_add_one_eq_IicSuccProd' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ℕ) :

have a := algFunction alg n (hist alg ω n) (ω.1 (n + 1)); hist alg ω (n + 1) = (MeasurableEquiv.IicSuccProd (fun (x : ℕ) => α × R) n).symm (hist alg ω n, a, ω.2 (Learning.pullCount' n (hist alg ω n) a) a)

source

noncomputable def Bandits.ArrayModel.action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ℕ) (ω : probSpace α R) :

Action taken at time n in the array model.

Equations

Bandits.ArrayModel.action alg n ω = (Bandits.ArrayModel.hist alg ω n ⟨n, ⋯⟩).1

Instances For

source

theorem Bandits.ArrayModel.action_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) :

action alg 0 = fun (ω : probSpace α R) => initAlgFunction alg (ω.1 0)

source

theorem Bandits.ArrayModel.action_add_one_eq {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ℕ) :

action alg (n + 1) = fun (ω : probSpace α R) => algFunction alg n (hist alg ω n) (ω.1 (n + 1))

source

noncomputable def Bandits.ArrayModel.reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ℕ) (ω : probSpace α R) :

Reward received at time n in the array model.

Equations

Bandits.ArrayModel.reward alg n ω = (Bandits.ArrayModel.hist alg ω n ⟨n, ⋯⟩).2

Instances For

source

theorem Bandits.ArrayModel.reward_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) :

reward alg 0 = fun (ω : probSpace α R) => ω.2 0 (action alg 0 ω)

source

theorem Bandits.ArrayModel.reward_add_one {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ℕ) :

reward alg (n + 1) = fun (ω : probSpace α R) => ω.2 (Learning.pullCount' n (hist alg ω n) (action alg (n + 1) ω)) (action alg (n + 1) ω)

source

theorem Bandits.ArrayModel.reward_eq {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ℕ) :

reward alg n = fun (ω : probSpace α R) => ω.2 (Learning.pullCount (action alg) (action alg n ω) n ω) (action alg n ω)

source

theorem Bandits.ArrayModel.measurable_action_add_one' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] {alg : Learning.Algorithm α R} (n : ℕ) (h : Measurable fun (x : probSpace α R) => hist alg x n) :

Measurable fun (x : probSpace α R) => algFunction alg n (hist alg x n) (x.1 (n + 1))

source

theorem Bandits.ArrayModel.measurable_pullCount'_action_add_one {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] {alg : Learning.Algorithm α R} (n : ℕ) (h_hist : Measurable fun (x : probSpace α R) => hist alg x n) :

Measurable fun (x : probSpace α R) => Learning.pullCount' n (hist alg x n) (algFunction alg n (hist alg x n) (x.1 (n + 1)))

source

theorem Bandits.ArrayModel.measurable_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ℕ) :

Measurable fun (ω : probSpace α R) => hist alg ω n

source

theorem Bandits.ArrayModel.measurable_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ℕ) :

Measurable (action alg n)

source

theorem Bandits.ArrayModel.measurable_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ℕ) :

Measurable (reward alg n)

source

theorem Bandits.ArrayModel.hist_add_one_eq_IicSuccProd {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ℕ) :

hist alg ω (n + 1) = (MeasurableEquiv.IicSuccProd (fun (x : ℕ) => α × R) n).symm (hist alg ω n, action alg (n + 1) ω, reward alg (n + 1) ω)

source

theorem Bandits.ArrayModel.measurable_pullCount_action_add_one {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ℕ) :

Measurable fun (ω : probSpace α R) => Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω

source

theorem Bandits.ArrayModel.hist_congr {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ℕ) {ω ω' : probSpace α R} (hω1 : ∀ i ≤ n, ω.1 i = ω'.1 i) (hω2 : ∀ (i : ℕ) (a : α), i < Learning.pullCount (action alg) a (n + 1) ω → ω.2 i a = ω'.2 i a) :

hist alg ω n = hist alg ω' n

source

theorem Bandits.ArrayModel.stepsUntil_congr_aux {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (a : α) (m n : ℕ) {ω ω' : probSpace α R} (hω1 : ∀ (i : ℕ), ω.1 i = ω'.1 i) (hω2_ne : ∀ (i : ℕ) (b : α), b ≠ a → ω.2 i b = ω'.2 i b) (hω2_eq : ∀ (i : ℕ), i + 1 ≤ m → ω.2 i a = ω'.2 i a) (h_eq : action alg (n + 1) ω = a ∧ Learning.pullCount (action alg) a (n + 1) ω = m) :

action alg (n + 1) ω' = a ∧ Learning.pullCount (action alg) a (n + 1) ω' = m

source

theorem Bandits.ArrayModel.stepsUntil_congr {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (a : α) (m n : ℕ) {ω ω' : probSpace α R} (hω1 : ∀ (i : ℕ), ω.1 i = ω'.1 i) (hω2_ne : ∀ (i : ℕ) (b : α), b ≠ a → ω.2 i b = ω'.2 i b) (hω2_eq : ∀ (i : ℕ), i + 1 ≤ m → ω.2 i a = ω'.2 i a) :

action alg (n + 1) ω = a ∧ Learning.pullCount (action alg) a (n + 1) ω = m ↔ action alg (n + 1) ω' = a ∧ Learning.pullCount (action alg) a (n + 1) ω' = m

source

theorem Bandits.ArrayModel.stepsUntil_indicator_congr {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (a : α) (m n : ℕ) {ω ω' : probSpace α R} (hω1 : ∀ (i : ℕ), ω.1 i = ω'.1 i) (hω2_ne : ∀ (i : ℕ) (b : α), b ≠ a → ω.2 i b = ω'.2 i b) (hω2_eq : ∀ (i : ℕ), i + 1 ≤ m → ω.2 i a = ω'.2 i a) :

{ω : probSpace α R | action alg (n + 1) ω = a ∧ Learning.pullCount (action alg) a (n + 1) ω = m}.indicator (fun (x : probSpace α R) => 1) ω = {ω : probSpace α R | action alg (n + 1) ω = a ∧ Learning.pullCount (action alg) a (n + 1) ω = m}.indicator (fun (x : probSpace α R) => 1) ω'

source

theorem Bandits.ArrayModel.measurable_hist_todo {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ℕ) :

Measurable fun (x : probSpace α R) => hist alg x n

source

noncomputable def Bandits.ArrayModel.truePast {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] (alg : Learning.Algorithm α R) (a : α) (n : ℕ) (ω : probSpace α R) :

probSpace α R

All random variables in the space, except for the unseen rewards for action a after time n.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

theorem Bandits.ArrayModel.truePast_eq_of_pullCount_eq {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] (alg : Learning.Algorithm α R) (a : α) (n m : ℕ) (ω : probSpace α R) (h_pc : Learning.pullCount (action alg) a (n + 1) ω = m) :

truePast alg a n ω = (ω.1, fun (i : ℕ) (b : α) => if b = a then if m ≠ 0 then ω.2 (min i (m - 1)) a else ⋯.some else ω.2 i b)

source

theorem Bandits.ArrayModel.truePast_eq_of_pullCount_eq_of_ne_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] (alg : Learning.Algorithm α R) (a : α) (n m : ℕ) (ω : probSpace α R) (h_pc : Learning.pullCount (action alg) a (n + 1) ω = m) (hm : m ≠ 0) :

truePast alg a n ω = (ω.1, fun (i : ℕ) (b : α) => if b = a then ω.2 (min i (m - 1)) a else ω.2 i b)

source

theorem Bandits.ArrayModel.measurable_hist_truePast {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (a : α) (n : ℕ) :

Measurable fun (x : probSpace α R) => hist alg x n

source

theorem Bandits.ArrayModel.measurable_action_add_one_truePast {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (a : α) (n : ℕ) :

Measurable (action alg (n + 1))

source

theorem Bandits.ArrayModel.measurable_pullCount_add_one_truePast {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (a : α) (n : ℕ) :

Measurable (Learning.pullCount (action alg) a (n + 1))

source

theorem Bandits.ArrayModel.measurable_stepsUntil {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (a : α) (m n : ℕ) :

Measurable ({ω : probSpace α R | action alg (n + 1) ω = a ∧ Learning.pullCount (action alg) a (n + 1) ω = m}.indicator fun (x : probSpace α R) => 1)

source

theorem Bandits.ArrayModel.measurable_pullCount_action_add_one_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ℕ) :

Measurable fun (ω : probSpace α R) => Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω

source

theorem Bandits.ArrayModel.map_snd_apply_arrayMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) :

MeasureTheory.Measure.map (fun (ω : probSpace α R) => ω.2 n a) (arrayMeasure ν) = ν a

source

theorem Bandits.ArrayModel.indepFun_fst_snd {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.IndepFun Prod.fst Prod.snd (arrayMeasure ν)

source

theorem Bandits.ArrayModel.indepFun_fst_zero_snd_zero_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) :

ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.1 0) (fun (ω : probSpace α R) => ω.2 0 a) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.indepFun_fst_add_one_aux {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.1 (n + 1)) (fun (ω : probSpace α R) => (fun (i : ↥(Finset.Iic n)) => ω.1 ↑i, ω.2)) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.indepFun_fst_add_one_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [StandardBorelSpace R] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.1 (n + 1)) (fun (x : probSpace α R) => hist alg x n) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.indepFun_snd_apply_aux {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [DecidableEq α] [Nonempty R] (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (m : ℕ) :

ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.2 m a) (fun (ω : probSpace α R) => (ω.1, fun (k : ℕ) (b : α) => if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b else ⋯.some else ω.2 k b)) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.indepFun_snd_apply_pullCount_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (m n : ℕ) :

ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.2 m a) ({ω : probSpace α R | action alg (n + 1) ω = a ∧ Learning.pullCount (action alg) a (n + 1) ω = m}.indicator fun (x : probSpace α R) => 1) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.indepFun_todo {α : Type u_3} {β : Type u_4} {γ : Type u_5} {δ : Type u_6} {mα : MeasurableSpace α} {mβ : MeasurableSpace β} {mγ : MeasurableSpace γ} {mδ : MeasurableSpace δ} [MeasurableSingletonClass δ] {μ : MeasureTheory.Measure α} {X : α → β} {Y : α → γ} (hXY : ProbabilityTheory.IndepFun X Y μ) (hY : Measurable Y) {Z : γ → δ} (hZ : Measurable Z) (z : δ) :

ProbabilityTheory.IndepFun X Y μ[|Z ∘ Y ⁻¹' {z}]

source

theorem Bandits.ArrayModel.indepFun_snd_hist_cond {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [StandardBorelSpace R] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n m : ℕ) :

ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.2 m a) (fun (x : probSpace α R) => hist alg x n) (arrayMeasure ν)[|(fun (ω : probSpace α R) => (action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]

source

theorem Bandits.ArrayModel.hasLaw_action_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.HasLaw (action alg 0) alg.p0 (arrayMeasure ν)

source

theorem Bandits.ArrayModel.hasCondDistrib_reward_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.HasCondDistrib (reward alg 0) (action alg 0) ν (arrayMeasure ν)

source

theorem Bandits.ArrayModel.hasCondDistrib_action' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasCondDistrib (action alg (n + 1)) (fun (x : probSpace α R) => hist alg x n) (alg.policy n) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.hasCondDistrib_reward_pullCount_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasCondDistrib (reward alg (n + 1)) (fun (ω : probSpace α R) => (action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) (ProbabilityTheory.Kernel.prodMkRight ℕ ν) (arrayMeasure ν)

The conditional distribution of the reward at time n + 1, given the action at time n + 1 and the number of times that action has been pulled before time n + 1, is equal to the kernel ν.

source

theorem Bandits.ArrayModel.reward_ae_eq_cond {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) (a : α) (n m : ℕ) :

reward alg (n + 1) =ᵐ[(arrayMeasure ν)[|(fun (ω : probSpace α R) => (action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]] fun (ω : probSpace α R) => ω.2 m a

source

theorem Bandits.ArrayModel.hasCondDistrib_reward_hist_action_pullCount {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasCondDistrib (reward alg (n + 1)) (fun (ω : probSpace α R) => (hist alg ω n, action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) (ProbabilityTheory.Kernel.prodMkLeft (↥(Finset.Iic n) → α × R) (ProbabilityTheory.Kernel.prodMkRight ℕ ν)) (arrayMeasure ν)

The conditional distribution of the reward at time n + 1, given the history up to time n, the action at time n + 1, and the number of times that action has been pulled before time n + 1, is equal to the kernel ν.

source

theorem Bandits.ArrayModel.condIndepFun_reward_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (fun (ω : probSpace α R) => (action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) inferInstance) ⋯ (reward alg (n + 1)) (fun (x : probSpace α R) => hist alg x n) (arrayMeasure ν)

The reward at time n + 1 is conditionally independent of the history up to time n, given the action at time n + 1 and the number of times that action has been pulled before time n + 1.

source

theorem Bandits.ArrayModel.hasCondDistrib_reward' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasCondDistrib (reward alg (n + 1)) (fun (ω : probSpace α R) => (hist alg ω n, action alg (n + 1) ω)) (ProbabilityTheory.Kernel.prodMkLeft (↥(Finset.Iic n) → α × R) ν) (arrayMeasure ν)

The conditional distribution of the reward at time n + 1, given the history up to time n and the action at time n + 1, is equal to the kernel ν.

source

theorem Bandits.ArrayModel.hasCondDistrib_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasCondDistrib (action alg (n + 1)) (fun (ω : probSpace α R) (i : ↥(Finset.Iic n)) => (action alg (↑i) ω, reward alg (↑i) ω)) (alg.policy n) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.hasCondDistrib_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasCondDistrib (reward alg (n + 1)) (fun (ω : probSpace α R) => (fun (i : ↥(Finset.Iic n)) => (action alg (↑i) ω, reward alg (↑i) ω), action alg (n + 1) ω)) ((Learning.stationaryEnv ν).feedback n) (arrayMeasure ν)

source

theorem Bandits.ArrayModel.isAlgEnvSeq_arrayMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

Learning.IsAlgEnvSeq (action alg) (reward alg) alg (Learning.stationaryEnv ν) (arrayMeasure ν)

Documentation

LeanBandits.Bandit.Bandit

Bandit #