Algorithms #

structure Learning.Algorithm (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :

Type (max u_3 u_4)

A stochastic, sequential algorithm.

policy (n : ℕ) : ProbabilityTheory.Kernel (↥(Finset.Iic n) → α × R) α
Policy or sampling rule: distribution of the next action.
h_policy (n : ℕ) : ProbabilityTheory.IsMarkovKernel (self.policy n)
p0 : MeasureTheory.Measure α
Distribution of the first action.
hp0 : MeasureTheory.IsProbabilityMeasure self.p0

Instances For

instance Learning.instIsMarkovKernelForallSubtypeNatMemFinsetIicProdPolicy {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (n : ℕ) :

ProbabilityTheory.IsMarkovKernel (alg.policy n)

source

instance Learning.instIsProbabilityMeasureP0 {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) :

MeasureTheory.IsProbabilityMeasure alg.p0

source

structure Learning.Environment (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :

Type (max u_3 u_4)

A stochastic environment.

feedback (n : ℕ) : ProbabilityTheory.Kernel ((↥(Finset.Iic n) → α × R) × α) R
Distribution of the next observation as function of the past history.
h_feedback (n : ℕ) : ProbabilityTheory.IsMarkovKernel (self.feedback n)
ν0 : ProbabilityTheory.Kernel α R
Distribution of the first observation given the first action.
hp0 : ProbabilityTheory.IsMarkovKernel self.ν0

Instances For

source

instance Learning.instIsMarkovKernelProdForallSubtypeNatMemFinsetIicFeedback {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (env : Environment α R) (n : ℕ) :

ProbabilityTheory.IsMarkovKernel (env.feedback n)

source

instance Learning.instIsMarkovKernelν0 {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (env : Environment α R) :

ProbabilityTheory.IsMarkovKernel env.ν0

source

noncomputable def Learning.stepKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

ProbabilityTheory.Kernel (↥(Finset.Iic n) → α × R) (α × R)

Kernel describing the distribution of the next action-reward pair given the history up to n.

Equations

Learning.stepKernel alg env n = (alg.policy n).compProd (env.feedback n)

Instances For

source

instance Learning.instIsMarkovKernelForallSubtypeNatMemFinsetIicProdStepKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

ProbabilityTheory.IsMarkovKernel (stepKernel alg env n)

Equations

⋯ = ⋯

source

@[simp]

theorem Learning.fst_stepKernel {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

(stepKernel alg env n).fst = alg.policy n

source

noncomputable def Learning.traj {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

ProbabilityTheory.Kernel (↥(Finset.Iic n) → α × R) (ℕ → α × R)

Kernel sending a partial trajectory of the bandit interaction Iic n → α × ℝ to a measure on ℕ → α × ℝ, supported on full trajectories that start with the partial one.

Equations

Learning.traj alg env n = ProbabilityTheory.Kernel.traj (Learning.stepKernel alg env) n

Instances For

source

instance Learning.instIsMarkovKernelForallSubtypeNatMemFinsetIicProdForallTraj {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

ProbabilityTheory.IsMarkovKernel (traj alg env n)

Equations

⋯ = ⋯

source

noncomputable def Learning.trajMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

MeasureTheory.Measure (ℕ → α × R)

Measure on the sequence of actions and observations generated by the algorithm/environment.

Equations

Learning.trajMeasure alg env = ProbabilityTheory.Kernel.trajMeasure (alg.p0.compProd env.ν0) (Learning.stepKernel alg env)

Instances For

source

instance Learning.instIsProbabilityMeasureForallNatProdTrajMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

MeasureTheory.IsProbabilityMeasure (trajMeasure alg env)

Equations

⋯ = ⋯

source

def Learning.step {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

α × R

Action and reward at step n.

Equations

Learning.step n h = h n

Instances For

source

def Learning.action {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

action n is the action pulled at time n. This is a random variable on the measurable space ℕ → α × ℝ.

Equations

Learning.action n h = (h n).1

Instances For

source

def Learning.reward {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

reward n is the reward at time n. This is a random variable on the measurable space ℕ → α × R.

Equations

Learning.reward n h = (h n).2

Instances For

source

def Learning.hist {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

↥(Finset.Iic n) → α × R

hist n is the history up to time n. This is a random variable on the measurable space ℕ → α × R.

Equations

Learning.hist n h i = h ↑i

Instances For

source

theorem Learning.fst_comp_step {α : Type u_1} {R : Type u_2} (n : ℕ) :

Prod.fst ∘ step n = action n

source

theorem Learning.measurable_step {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (step n)

source

theorem Learning.measurable_step_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => step p.1 p.2

source

theorem Learning.measurable_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (action n)

source

theorem Learning.measurable_action_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => action p.1 p.2

source

theorem Learning.measurable_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (reward n)

source

theorem Learning.measurable_reward_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => reward p.1 p.2

source

theorem Learning.measurable_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (hist n)

source

theorem Learning.hist_eq_frestrictLe {α : Type u_1} {R : Type u_2} :

hist = Preorder.frestrictLe

source

def Learning.filtration (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :

MeasureTheory.Filtration ℕ inferInstance

Filtration of the algorithm interaction.

Equations

Learning.filtration α R = MeasureTheory.Filtration.piLE

Instances For

source

theorem Learning.step_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ℕ) :

step n = (fun (x : ↥(Finset.Iic n) → α × R) => x ⟨n, ⋯⟩) ∘ hist n

source

theorem Learning.action_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ℕ) :

action n = (fun (x : ↥(Finset.Iic n) → α × R) => (x ⟨n, ⋯⟩).1) ∘ hist n

source

theorem Learning.reward_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ℕ) :

reward n = (fun (x : ↥(Finset.Iic n) → α × R) => (x ⟨n, ⋯⟩).2) ∘ hist n

source

theorem Learning.measurable_step_filtration {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (step n)

source

theorem Learning.adapted_step {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [TopologicalSpace α] [TopologicalSpace.PseudoMetrizableSpace α] [SecondCountableTopology α] [OpensMeasurableSpace α] [TopologicalSpace R] [TopologicalSpace.PseudoMetrizableSpace R] [SecondCountableTopology R] [OpensMeasurableSpace R] :

MeasureTheory.Adapted (Learning.filtration α R) fun (n : ℕ) => step n

source

theorem Learning.measurable_hist_filtration {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (hist n)

source

theorem Learning.adapted_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [TopologicalSpace α] [TopologicalSpace.PseudoMetrizableSpace α] [SecondCountableTopology α] [OpensMeasurableSpace α] [TopologicalSpace R] [TopologicalSpace.PseudoMetrizableSpace R] [SecondCountableTopology R] [OpensMeasurableSpace R] :

MeasureTheory.Adapted (Learning.filtration α R) hist

source

theorem Learning.measurable_action_filtration {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (action n)

source

theorem Learning.adapted_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [TopologicalSpace α] [TopologicalSpace.PseudoMetrizableSpace α] [SecondCountableTopology α] [OpensMeasurableSpace α] :

MeasureTheory.Adapted (Learning.filtration α R) action

source

theorem Learning.measurable_reward_filtration {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (reward n)

source

theorem Learning.adapted_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [TopologicalSpace R] [TopologicalSpace.PseudoMetrizableSpace R] [SecondCountableTopology R] [OpensMeasurableSpace R] :

MeasureTheory.Adapted (Learning.filtration α R) reward

source

theorem Learning.condDistrib_step {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

⇑𝓛[step (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] ⇑(stepKernel alg env n)

source

theorem Learning.condDistrib_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

⇑𝓛[action (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] ⇑(alg.policy n)

source

theorem Learning.condDistrib_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

⇑𝓛[reward (n + 1) | fun (ω : ℕ → α × R) => (hist n ω, action (n + 1) ω); trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (fun (ω : ℕ → α × R) => (hist n ω, action (n + 1) ω)) (trajMeasure alg env)] ⇑(env.feedback n)

source

theorem Learning.hasLaw_step_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

ProbabilityTheory.HasLaw (step 0) (alg.p0.compProd env.ν0) (trajMeasure alg env)

source

theorem Learning.hasLaw_action_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

ProbabilityTheory.HasLaw (action 0) alg.p0 (trajMeasure alg env)

source

theorem Learning.condDistrib_reward_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) :

⇑𝓛[reward 0 | action 0; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (action 0) (trajMeasure alg env)] ⇑env.ν0

source

def Learning.stationaryEnv {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

Environment α R

A stationary environment, in which the distribution of the next reward depends only on the last action.

Equations

Learning.stationaryEnv ν = { feedback := fun (x : ℕ) => ProbabilityTheory.Kernel.prodMkLeft (↥(Finset.Iic x) → α × R) ν, h_feedback := ⋯, ν0 := ν, hp0 := inst✝ }

Instances For

source

@[simp]

theorem Learning.stationaryEnv_ν0 {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

(stationaryEnv ν).ν0 = ν

source

@[simp]

theorem Learning.stationaryEnv_feedback {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (x✝ : ℕ) :

(stationaryEnv ν).feedback x✝ = ProbabilityTheory.Kernel.prodMkLeft (↥(Finset.Iic x✝) → α × R) ν

source

theorem Learning.condDistrib_reward_stationaryEnv {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {alg : Algorithm α R} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (n : ℕ) :

⇑𝓛[reward n | action n; trajMeasure alg (stationaryEnv ν)] =ᵐ[MeasureTheory.Measure.map (action n) (trajMeasure alg (stationaryEnv ν))] ⇑ν

source

theorem Learning.condIndepFun_reward_hist_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {alg : Algorithm α R} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (n : ℕ) :

ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (action (n + 1)) inferInstance) ⋯ (reward (n + 1)) (hist n) (trajMeasure alg (stationaryEnv ν))

Documentation

LeanBandits.SequentialLearning.Algorithm

Algorithms #