Bandit #

source

noncomputable def Bandits.Bandit.stepKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.Kernel ({ x : ℕ // x ∈ Finset.Iic n } → α × R) (α × R)

Kernel describing the distribution of the next arm-reward pair given the history up to n.

Equations

Bandits.Bandit.stepKernel alg ν n = Learning.stepKernel alg (Learning.stationaryEnv ν) n

Instances For

source

instance Bandits.Bandit.instIsMarkovKernelForallSubtypeNatMemFinsetIicProdStepKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.IsMarkovKernel (stepKernel alg ν n)

Equations

⋯ = ⋯

source

@[simp]

theorem Bandits.Bandit.fst_stepKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

(stepKernel alg ν n).fst = alg.policy n

source

@[simp]

theorem Bandits.Bandit.snd_stepKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

(stepKernel alg ν n).snd = ν.comp (alg.policy n)

source

noncomputable def Bandits.Bandit.trajMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.Measure (ℕ → α × R)

Measure on the sequence of arms pulled and rewards observed generated by the bandit.

Equations

Bandits.Bandit.trajMeasure alg ν = ProbabilityTheory.Kernel.trajMeasure (alg.p0.compProd ν) (Bandits.Bandit.stepKernel alg ν)

Instances For

source

instance Bandits.Bandit.instIsProbabilityMeasureForallNatProdTrajMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.IsProbabilityMeasure (trajMeasure alg ν)

Equations

⋯ = ⋯

source

noncomputable def Bandits.Bandit.streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.Measure (ℕ → α → R)

Measure of an infinite stream of rewards from each arm.

Equations

Bandits.Bandit.streamMeasure ν = MeasureTheory.Measure.infinitePi fun (x : ℕ) => MeasureTheory.Measure.infinitePi ⇑ν

Instances For

source

instance Bandits.Bandit.instIsProbabilityMeasureForallNatForallStreamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.IsProbabilityMeasure (streamMeasure ν)

Equations

⋯ = ⋯

source

noncomputable def Bandits.Bandit.measure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.Measure ((ℕ → α × R) × (ℕ → α → R))

Joint distribution of the sequence of arm pulled and rewards, and a stream of independent rewards from all arms.

Equations

Bandits.Bandit.measure alg ν = (Bandits.Bandit.trajMeasure alg ν).prod (Bandits.Bandit.streamMeasure ν)

Instances For

source

instance Bandits.Bandit.instIsProbabilityMeasureProdForallNatForallForallMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

MeasureTheory.IsProbabilityMeasure (measure alg ν)

Equations

⋯ = ⋯

source

@[simp]

theorem Bandits.Bandit.fst_measure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

(measure alg ν).fst = trajMeasure alg ν

source

@[simp]

theorem Bandits.Bandit.snd_measure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

(measure alg ν).snd = streamMeasure ν

source

theorem hasLaw_eval_infinitePi {ι : Type u_3} {X : ι → Type u_4} {mX : (i : ι) → MeasurableSpace (X i)} (μ : (i : ι) → MeasureTheory.Measure (X i)) [hμ : ∀ (i : ι), MeasureTheory.IsProbabilityMeasure (μ i)] (i : ι) :

ProbabilityTheory.HasLaw (Function.eval i) (μ i) (MeasureTheory.Measure.infinitePi μ)

source

theorem Bandits.hasLaw_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

ProbabilityTheory.HasLaw (fun (h : ℕ → α → R) => h n) (MeasureTheory.Measure.infinitePi ⇑ν) (Bandit.streamMeasure ν)

source

theorem Bandits.hasLaw_eval_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) :

ProbabilityTheory.HasLaw (fun (h : ℕ → α → R) => h n a) (ν a) (Bandit.streamMeasure ν)

source

theorem Bandits.identDistrib_eval_eval_id_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) :

ProbabilityTheory.IdentDistrib (fun (h : ℕ → α → R) => h n a) id (Bandit.streamMeasure ν) (ν a)

source

theorem Bandits.Integrable.congr_identDistrib {Ω : Type u_3} {Ω' : Type u_4} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {μ : MeasureTheory.Measure Ω} {μ' : MeasureTheory.Measure Ω'} {X : Ω → ℝ} {Y : Ω' → ℝ} (hX : MeasureTheory.Integrable X μ) (hXY : ProbabilityTheory.IdentDistrib X Y μ μ') :

MeasureTheory.Integrable Y μ'

source

theorem Bandits.integrable_eval_streamMeasure {α : Type u_1} {mα : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ℝ) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) (h_int : MeasureTheory.Integrable id (ν a)) :

MeasureTheory.Integrable (fun (h : ℕ → α → ℝ) => h n a) (Bandit.streamMeasure ν)

source

theorem Bandits.integral_eval_streamMeasure {α : Type u_1} {mα : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ℝ) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : α) :

∫ (h : ℕ → α → ℝ), h n a ∂Bandit.streamMeasure ν = ∫ (x : ℝ), id x ∂ν a

source

theorem Bandits.iIndepFun_eval_streamMeasure' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : ℕ → α → R) => ω n) (Bandit.streamMeasure ν)

source

theorem Bandits.iIndepFun_eval_streamMeasure'' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) :

ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : ℕ → α → R) => ω n a) (Bandit.streamMeasure ν)

source

theorem Bandits.iIndepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.iIndepFun (fun (p : ℕ × α) (ω : ℕ → α → R) => ω p.1 p.2) (Bandit.streamMeasure ν)

source

theorem Bandits.indepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {n m : ℕ} {a b : α} (h : n ≠ m ∨ a ≠ b) :

ProbabilityTheory.IndepFun (fun (ω : ℕ → α → R) => ω n a) (fun (ω : ℕ → α → R) => ω m b) (Bandit.streamMeasure ν)

source

theorem Bandits.indepFun_eval_streamMeasure' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {a b : α} (h : a ≠ b) :

ProbabilityTheory.IndepFun (fun (ω : ℕ → α → R) (n : ℕ) => ω n a) (fun (ω : ℕ → α → R) (n : ℕ) => ω n b) (Bandit.streamMeasure ν)

source

def Bandits.arm {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

arm n is the arm pulled at time n. This is a random variable on the measurable space ℕ → α × ℝ.

Equations

Bandits.arm n h = (h n).1

Instances For

source

def Bandits.reward {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

reward n is the reward at time n. This is a random variable on the measurable space ℕ → α × R.

Equations

Bandits.reward n h = (h n).2

Instances For

source

def Bandits.hist {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

{ x : ℕ // x ∈ Finset.Iic n } → α × R

hist n is the history up to time n. This is a random variable on the measurable space ℕ → α × R.

Equations

Bandits.hist n h i = h ↑i

Instances For

source

theorem Bandits.measurable_arm {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (arm n)

source

theorem Bandits.measurable_arm_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => arm p.1 p.2

source

theorem Bandits.measurable_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (reward n)

source

theorem Bandits.measurable_reward_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => reward p.1 p.2

source

theorem Bandits.measurable_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (hist n)

source

theorem Bandits.hist_eq_frestrictLe {α : Type u_1} {R : Type u_2} :

hist = Preorder.frestrictLe

source

def Bandits.filtration (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :

MeasureTheory.Filtration ℕ inferInstance

Filtration of the bandit process.

Equations

Bandits.filtration α R = MeasureTheory.Filtration.piLE

Instances For

source

theorem Bandits.hasLaw_step_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.HasLaw (fun (h : ℕ → α × R) => h 0) (alg.p0.compProd ν) (Bandit.trajMeasure alg ν)

source

theorem Bandits.hasLaw_arm_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.HasLaw (arm 0) alg.p0 (Bandit.trajMeasure alg ν)

source

theorem Bandits.condDistrib_arm_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

⇑(ProbabilityTheory.condDistrib (fun (h : ℕ → α × R) => (arm (n + 1) h, reward (n + 1) h)) (hist n) (Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (hist n) (Bandit.trajMeasure alg ν)] ⇑(Bandit.stepKernel alg ν n)

source

theorem Bandits.condDistrib_reward' {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

⇑(ProbabilityTheory.condDistrib (reward (n + 1)) (fun (ω : ℕ → α × R) => (hist n ω, arm (n + 1) ω)) (Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (fun (ω : ℕ → α × R) => (hist n ω, arm (n + 1) ω)) (Bandit.trajMeasure alg ν)] ⇑(ProbabilityTheory.Kernel.prodMkLeft ({ x : ℕ // x ∈ Finset.Iic n } → α × R) ν)

source

theorem Bandits.condDistrib_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

⇑(ProbabilityTheory.condDistrib (reward n) (arm n) (Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (arm n) (Bandit.trajMeasure alg ν)] ⇑ν

source

theorem Bandits.condDistrib_arm {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

⇑(ProbabilityTheory.condDistrib (arm (n + 1)) (hist n) (Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (hist n) (Bandit.trajMeasure alg ν)] ⇑(alg.policy n)

source

theorem Bandits.condIndepFun_reward_hist_arm {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {alg : Learning.Algorithm α R} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

reward (n + 1) ⟂ᵢ[arm (n + 1), ⋯; Bandit.trajMeasure alg ν] hist n

The reward at time n+1 is independent of the history up to time n given the arm at n+1.

source

theorem Bandits.HasLaw_arm_zero_detAlgorithm {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {nextArm : (n : ℕ) → ({ x : ℕ // x ∈ Finset.Iic n } → α × R) → α} {h_next : ∀ (n : ℕ), Measurable (nextArm n)} {arm0 : α} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] :

ProbabilityTheory.HasLaw (arm 0) (MeasureTheory.Measure.dirac arm0) (Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν)

source

theorem Bandits.arm_zero_detAlgorithm {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {nextArm : (n : ℕ) → ({ x : ℕ // x ∈ Finset.Iic n } → α × R) → α} {h_next : ∀ (n : ℕ), Measurable (nextArm n)} {arm0 : α} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] [MeasurableSingletonClass α] :

arm 0 =ᵐ[Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν] fun (x : ℕ → α × R) => arm0

source

theorem Bandits.arm_detAlgorithm_ae_eq {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {nextArm : (n : ℕ) → ({ x : ℕ // x ∈ Finset.Iic n } → α × R) → α} {h_next : ∀ (n : ℕ), Measurable (nextArm n)} {arm0 : α} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :

arm (n + 1) =ᵐ[Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν] fun (h : ℕ → α × R) => nextArm n fun (i : { x : ℕ // x ∈ Finset.Iic n }) => h ↑i

Documentation

LeanBandits.Bandit

Bandit #