Algorithms #

source

def Learning.IT.step {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

α × R

Action and reward at step n.

Equations

Learning.IT.step n h = h n

Instances For

source

def Learning.IT.action {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

action n is the action pulled at time n. This is a random variable on the measurable space ℕ → α × ℝ.

Equations

Learning.IT.action n h = (h n).1

Instances For

source

def Learning.IT.reward {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

reward n is the reward at time n. This is a random variable on the measurable space ℕ → α × R.

Equations

Learning.IT.reward n h = (h n).2

Instances For

source

def Learning.IT.hist {α : Type u_1} {R : Type u_2} (n : ℕ) (h : ℕ → α × R) :

↥(Finset.Iic n) → α × R

hist n is the history up to time n. This is a random variable on the measurable space ℕ → α × R.

Equations

Learning.IT.hist n h i = h ↑i

Instances For

source

theorem Learning.IT.fst_comp_step {α : Type u_1} {R : Type u_2} (n : ℕ) :

Prod.fst ∘ step n = action n

source

theorem Learning.IT.measurable_step {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (step n)

source

theorem Learning.IT.measurable_step_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => step p.1 p.2

source

theorem Learning.IT.measurable_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (action n)

source

theorem Learning.IT.measurable_action_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => action p.1 p.2

source

theorem Learning.IT.measurable_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (reward n)

source

theorem Learning.IT.measurable_reward_prod {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

Measurable fun (p : ℕ × (ℕ → α × R)) => reward p.1 p.2

source

theorem Learning.IT.measurable_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (hist n)

source

theorem Learning.IT.hist_eq_frestrictLe {α : Type u_1} {R : Type u_2} :

hist = Preorder.frestrictLe

source

def Learning.IT.filtration (α : Type u_4) (R : Type u_5) [MeasurableSpace α] [MeasurableSpace R] :

MeasureTheory.Filtration ℕ inferInstance

Filtration of the algorithm Seq.

Equations

Learning.IT.filtration α R = MeasureTheory.Filtration.piLE

Instances For

source

theorem Learning.IT.filtration_eq_comap {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

↑(IT.filtration α R) n = MeasurableSpace.comap (hist n) inferInstance

source

theorem Learning.IT.step_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ℕ) :

step n = (fun (x : ↥(Finset.Iic n) → α × R) => x ⟨n, ⋯⟩) ∘ hist n

source

theorem Learning.IT.action_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ℕ) :

action n = (fun (x : ↥(Finset.Iic n) → α × R) => (x ⟨n, ⋯⟩).1) ∘ hist n

source

theorem Learning.IT.reward_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ℕ) :

reward n = (fun (x : ↥(Finset.Iic n) → α × R) => (x ⟨n, ⋯⟩).2) ∘ hist n

source

theorem Learning.IT.adapted_step {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

MeasureTheory.Adapted (IT.filtration α R) step

source

theorem Learning.IT.adapted_hist {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

MeasureTheory.Adapted (IT.filtration α R) hist

source

theorem Learning.IT.adapted_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

MeasureTheory.Adapted (IT.filtration α R) action

source

theorem Learning.IT.adapted_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

MeasureTheory.Adapted (IT.filtration α R) reward

source

def Learning.IT.filtrationAction (α : Type u_4) (R : Type u_5) [MeasurableSpace α] [MeasurableSpace R] :

MeasureTheory.Filtration ℕ inferInstance

Filtration generated by the history at time n-1 together with the action at time n.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

theorem Learning.IT.filtrationAction_zero_eq_comap {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} :

↑(filtrationAction α R) 0 = MeasurableSpace.comap (action 0) inferInstance

source

theorem Learning.IT.filtrationAction_eq_comap {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) (hn : n ≠ 0) :

↑(filtrationAction α R) n = MeasurableSpace.comap (fun (ω : ℕ → α × R) => (hist (n - 1) ω, action n ω)) inferInstance

source

theorem Learning.IT.filtration_le_filtrationAction_add_one {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

↑(IT.filtration α R) n ≤ ↑(filtrationAction α R) (n + 1)

source

theorem Learning.IT.filtration_le_filtrationAction {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {m n : ℕ} (h : n < m) :

↑(IT.filtration α R) n ≤ ↑(filtrationAction α R) m

source

theorem Learning.IT.filtrationAction_le_filtration_self {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

↑(filtrationAction α R) n ≤ ↑(IT.filtration α R) n

source

theorem Learning.IT.filtrationAction_le_filtration {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} {m n : ℕ} (h : m ≤ n) :

↑(filtrationAction α R) m ≤ ↑(IT.filtration α R) n

source

theorem Learning.IT.measurable_action_filtrationAction {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (n : ℕ) :

Measurable (action n)

source

theorem Learning.IT.hasLaw_step_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

ProbabilityTheory.HasLaw (step 0) (alg.p0.compProd env.ν0) (trajMeasure alg env)

source

theorem Learning.IT.hasLaw_action_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

ProbabilityTheory.HasLaw (action 0) alg.p0 (trajMeasure alg env)

source

theorem Learning.IT.condDistrib_reward_zero {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) :

⇑𝓛[reward 0 | action 0; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (action 0) (trajMeasure alg env)] ⇑env.ν0

source

theorem Learning.IT.condDistrib_step {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

⇑𝓛[step (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] ⇑(stepKernel alg env n)

source

theorem Learning.IT.condDistrib_action {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

⇑𝓛[action (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] ⇑(alg.policy n)

source

theorem Learning.IT.condDistrib_reward {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) (n : ℕ) :

⇑𝓛[reward (n + 1) | fun (ω : ℕ → α × R) => (hist n ω, action (n + 1) ω); trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (fun (ω : ℕ → α × R) => (hist n ω, action (n + 1) ω)) (trajMeasure alg env)] ⇑(env.feedback n)

source

theorem Learning.IT.isAlgEnvSeq_trajMeasure {α : Type u_1} {R : Type u_2} {mα : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) :

IsAlgEnvSeq action reward alg env (trajMeasure alg env)

Documentation

LeanBandits.SequentialLearning.IonescuTulceaSpace

Algorithms #