Bandit #
noncomputable def
Bandits.Bandit.stepKernel
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
ProbabilityTheory.Kernel ({ x : ℕ // x ∈ Finset.Iic n } → α × R) (α × R)
Kernel describing the distribution of the next arm-reward pair given the history up to n
.
Equations
- Bandits.Bandit.stepKernel alg ν n = Learning.stepKernel alg (Learning.stationaryEnv ν) n
Instances For
instance
Bandits.Bandit.instIsMarkovKernelForallSubtypeNatMemFinsetIicProdStepKernel
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
ProbabilityTheory.IsMarkovKernel (stepKernel alg ν n)
Equations
- ⋯ = ⋯
@[simp]
theorem
Bandits.Bandit.fst_stepKernel
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
@[simp]
theorem
Bandits.Bandit.snd_stepKernel
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
noncomputable def
Bandits.Bandit.trajMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
MeasureTheory.Measure (ℕ → α × R)
Measure on the sequence of arms pulled and rewards observed generated by the bandit.
Equations
- Bandits.Bandit.trajMeasure alg ν = ProbabilityTheory.Kernel.trajMeasure (alg.p0.compProd ν) (Bandits.Bandit.stepKernel alg ν)
Instances For
instance
Bandits.Bandit.instIsProbabilityMeasureForallNatProdTrajMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
Equations
- ⋯ = ⋯
noncomputable def
Bandits.Bandit.streamMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
MeasureTheory.Measure (ℕ → α → R)
Measure of an infinite stream of rewards from each arm.
Equations
Instances For
instance
Bandits.Bandit.instIsProbabilityMeasureForallNatForallStreamMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
Equations
- ⋯ = ⋯
noncomputable def
Bandits.Bandit.measure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
MeasureTheory.Measure ((ℕ → α × R) × (ℕ → α → R))
Joint distribution of the sequence of arm pulled and rewards, and a stream of independent rewards from all arms.
Equations
- Bandits.Bandit.measure alg ν = (Bandits.Bandit.trajMeasure alg ν).prod (Bandits.Bandit.streamMeasure ν)
Instances For
instance
Bandits.Bandit.instIsProbabilityMeasureProdForallNatForallForallMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
Equations
- ⋯ = ⋯
@[simp]
theorem
Bandits.Bandit.fst_measure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
@[simp]
theorem
Bandits.Bandit.snd_measure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
theorem
hasLaw_eval_infinitePi
{ι : Type u_3}
{X : ι → Type u_4}
{mX : (i : ι) → MeasurableSpace (X i)}
(μ : (i : ι) → MeasureTheory.Measure (X i))
[hμ : ∀ (i : ι), MeasureTheory.IsProbabilityMeasure (μ i)]
(i : ι)
:
theorem
Bandits.hasLaw_eval_streamMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
ProbabilityTheory.HasLaw (fun (h : ℕ → α → R) => h n) (MeasureTheory.Measure.infinitePi ⇑ν) (Bandit.streamMeasure ν)
theorem
Bandits.hasLaw_eval_eval_streamMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
(a : α)
:
ProbabilityTheory.HasLaw (fun (h : ℕ → α → R) => h n a) (ν a) (Bandit.streamMeasure ν)
theorem
Bandits.identDistrib_eval_eval_id_streamMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
(a : α)
:
ProbabilityTheory.IdentDistrib (fun (h : ℕ → α → R) => h n a) id (Bandit.streamMeasure ν) (ν a)
theorem
Bandits.Integrable.congr_identDistrib
{Ω : Type u_3}
{Ω' : Type u_4}
{mΩ : MeasurableSpace Ω}
{mΩ' : MeasurableSpace Ω'}
{μ : MeasureTheory.Measure Ω}
{μ' : MeasureTheory.Measure Ω'}
{X : Ω → ℝ}
{Y : Ω' → ℝ}
(hX : MeasureTheory.Integrable X μ)
(hXY : ProbabilityTheory.IdentDistrib X Y μ μ')
:
theorem
Bandits.integrable_eval_streamMeasure
{α : Type u_1}
{mα : MeasurableSpace α}
(ν : ProbabilityTheory.Kernel α ℝ)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
(a : α)
(h_int : MeasureTheory.Integrable id (ν a))
:
MeasureTheory.Integrable (fun (h : ℕ → α → ℝ) => h n a) (Bandit.streamMeasure ν)
theorem
Bandits.integral_eval_streamMeasure
{α : Type u_1}
{mα : MeasurableSpace α}
(ν : ProbabilityTheory.Kernel α ℝ)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
(a : α)
:
theorem
Bandits.iIndepFun_eval_streamMeasure'
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : ℕ → α → R) => ω n) (Bandit.streamMeasure ν)
theorem
Bandits.iIndepFun_eval_streamMeasure''
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(a : α)
:
ProbabilityTheory.iIndepFun (fun (n : ℕ) (ω : ℕ → α → R) => ω n a) (Bandit.streamMeasure ν)
theorem
Bandits.iIndepFun_eval_streamMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
ProbabilityTheory.iIndepFun (fun (p : ℕ × α) (ω : ℕ → α → R) => ω p.1 p.2) (Bandit.streamMeasure ν)
theorem
Bandits.indepFun_eval_streamMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
{n m : ℕ}
{a b : α}
(h : n ≠ m ∨ a ≠ b)
:
ProbabilityTheory.IndepFun (fun (ω : ℕ → α → R) => ω n a) (fun (ω : ℕ → α → R) => ω m b) (Bandit.streamMeasure ν)
theorem
Bandits.indepFun_eval_streamMeasure'
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
{a b : α}
(h : a ≠ b)
:
ProbabilityTheory.IndepFun (fun (ω : ℕ → α → R) (n : ℕ) => ω n a) (fun (ω : ℕ → α → R) (n : ℕ) => ω n b)
(Bandit.streamMeasure ν)
arm n
is the arm pulled at time n
. This is a random variable on the measurable space
ℕ → α × ℝ
.
Equations
- Bandits.arm n h = (h n).1
Instances For
reward n
is the reward at time n
. This is a random variable on the measurable space
ℕ → α × R
.
Equations
- Bandits.reward n h = (h n).2
Instances For
theorem
Bandits.measurable_arm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (arm n)
theorem
Bandits.measurable_arm_prod
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
:
theorem
Bandits.measurable_reward
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (reward n)
theorem
Bandits.measurable_reward_prod
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
:
theorem
Bandits.measurable_hist
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (hist n)
Filtration of the bandit process.
Equations
Instances For
theorem
Bandits.hasLaw_step_zero
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
ProbabilityTheory.HasLaw (fun (h : ℕ → α × R) => h 0) (alg.p0.compProd ν) (Bandit.trajMeasure alg ν)
theorem
Bandits.hasLaw_arm_zero
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
:
ProbabilityTheory.HasLaw (arm 0) alg.p0 (Bandit.trajMeasure alg ν)
theorem
Bandits.condDistrib_arm_reward
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
⇑(ProbabilityTheory.condDistrib (fun (h : ℕ → α × R) => (arm (n + 1) h, reward (n + 1) h)) (hist n)
(Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (hist n) (Bandit.trajMeasure alg ν)] ⇑(Bandit.stepKernel alg ν n)
theorem
Bandits.condDistrib_reward'
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
⇑(ProbabilityTheory.condDistrib (reward (n + 1)) (fun (ω : ℕ → α × R) => (hist n ω, arm (n + 1) ω))
(Bandit.trajMeasure alg
ν)) =ᵐ[MeasureTheory.Measure.map (fun (ω : ℕ → α × R) => (hist n ω, arm (n + 1) ω)) (Bandit.trajMeasure alg ν)] ⇑(ProbabilityTheory.Kernel.prodMkLeft ({ x : ℕ // x ∈ Finset.Iic n } → α × R) ν)
theorem
Bandits.condDistrib_reward
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
⇑(ProbabilityTheory.condDistrib (reward n) (arm n)
(Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (arm n) (Bandit.trajMeasure alg ν)] ⇑ν
theorem
Bandits.condDistrib_arm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
(alg : Learning.Algorithm α R)
(ν : ProbabilityTheory.Kernel α R)
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
⇑(ProbabilityTheory.condDistrib (arm (n + 1)) (hist n)
(Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (hist n) (Bandit.trajMeasure alg ν)] ⇑(alg.policy n)
theorem
Bandits.condIndepFun_reward_hist_arm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
{alg : Learning.Algorithm α R}
{ν : ProbabilityTheory.Kernel α R}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
The reward at time n+1
is independent of the history up to time n
given the arm at n+1
.
theorem
Bandits.HasLaw_arm_zero_detAlgorithm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextArm : (n : ℕ) → ({ x : ℕ // x ∈ Finset.Iic n } → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextArm n)}
{arm0 : α}
{ν : ProbabilityTheory.Kernel α R}
[ProbabilityTheory.IsMarkovKernel ν]
:
ProbabilityTheory.HasLaw (arm 0) (MeasureTheory.Measure.dirac arm0)
(Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν)
theorem
Bandits.arm_zero_detAlgorithm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextArm : (n : ℕ) → ({ x : ℕ // x ∈ Finset.Iic n } → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextArm n)}
{arm0 : α}
{ν : ProbabilityTheory.Kernel α R}
[ProbabilityTheory.IsMarkovKernel ν]
[MeasurableSingletonClass α]
:
arm 0 =ᵐ[Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν] fun (x : ℕ → α × R) => arm0
theorem
Bandits.arm_detAlgorithm_ae_eq
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextArm : (n : ℕ) → ({ x : ℕ // x ∈ Finset.Iic n } → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextArm n)}
{arm0 : α}
{ν : ProbabilityTheory.Kernel α R}
[ProbabilityTheory.IsMarkovKernel ν]
(n : ℕ)
:
arm (n + 1) =ᵐ[Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν] fun (h : ℕ → α × R) =>
nextArm n fun (i : { x : ℕ // x ∈ Finset.Iic n }) => h ↑i