Algorithms #
action n is the action pulled at time n. This is a random variable on the measurable space
ℕ → α × ℝ.
Equations
- Learning.IT.action n h = (h n).1
Instances For
reward n is the reward at time n. This is a random variable on the measurable space
ℕ → α × R.
Equations
- Learning.IT.reward n h = (h n).2
Instances For
hist n is the history up to time n. This is a random variable on the measurable space
ℕ → α × R.
Equations
- Learning.IT.hist n h i = h ↑i
Instances For
theorem
Learning.IT.measurable_step
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (step n)
theorem
Learning.IT.measurable_step_prod
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
:
theorem
Learning.IT.measurable_action
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (action n)
theorem
Learning.IT.measurable_action_prod
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
:
theorem
Learning.IT.measurable_reward
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (reward n)
theorem
Learning.IT.measurable_reward_prod
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
:
theorem
Learning.IT.measurable_hist
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (hist n)
Filtration of the algorithm Seq.
Equations
Instances For
theorem
Learning.IT.filtration_eq_comap
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
theorem
Learning.IT.measurable_step_filtration
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (step n)
theorem
Learning.IT.adapted_step
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[TopologicalSpace α]
[TopologicalSpace.PseudoMetrizableSpace α]
[SecondCountableTopology α]
[OpensMeasurableSpace α]
[TopologicalSpace R]
[TopologicalSpace.PseudoMetrizableSpace R]
[SecondCountableTopology R]
[OpensMeasurableSpace R]
:
theorem
Learning.IT.measurable_hist_filtration
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (hist n)
theorem
Learning.IT.adapted_hist
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[TopologicalSpace α]
[TopologicalSpace.PseudoMetrizableSpace α]
[SecondCountableTopology α]
[OpensMeasurableSpace α]
[TopologicalSpace R]
[TopologicalSpace.PseudoMetrizableSpace R]
[SecondCountableTopology R]
[OpensMeasurableSpace R]
:
theorem
Learning.IT.measurable_action_filtration
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (action n)
theorem
Learning.IT.adapted_action
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[TopologicalSpace α]
[TopologicalSpace.PseudoMetrizableSpace α]
[SecondCountableTopology α]
[OpensMeasurableSpace α]
:
theorem
Learning.IT.measurable_reward_filtration
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (reward n)
theorem
Learning.IT.adapted_reward
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[TopologicalSpace R]
[TopologicalSpace.PseudoMetrizableSpace R]
[SecondCountableTopology R]
[OpensMeasurableSpace R]
:
def
Learning.IT.filtrationAction
(α : Type u_4)
(R : Type u_5)
[MeasurableSpace α]
[MeasurableSpace R]
:
Filtration generated by the history at time n-1 together with the action at time n.
Equations
- One or more equations did not get rendered due to their size.
Instances For
theorem
Learning.IT.filtrationAction_zero_eq_comap
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
:
theorem
Learning.IT.filtrationAction_eq_comap
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
(hn : n ≠ 0)
:
↑(filtrationAction α R) n = MeasurableSpace.comap (fun (ω : ℕ → α × R) => (hist (n - 1) ω, action n ω)) inferInstance
theorem
Learning.IT.filtration_le_filtrationAction_add_one
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
theorem
Learning.IT.filtration_le_filtrationAction
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{m n : ℕ}
(h : n < m)
:
theorem
Learning.IT.filtrationAction_le_filtration_self
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
theorem
Learning.IT.filtrationAction_le_filtration
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{m n : ℕ}
(h : m ≤ n)
:
theorem
Learning.IT.measurable_action_filtrationAction
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(n : ℕ)
:
Measurable (action n)
theorem
Learning.IT.hasLaw_step_zero
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Algorithm α R)
(env : Environment α R)
:
ProbabilityTheory.HasLaw (step 0) (alg.p0.compProd env.ν0) (trajMeasure alg env)
theorem
Learning.IT.hasLaw_action_zero
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(alg : Algorithm α R)
(env : Environment α R)
:
ProbabilityTheory.HasLaw (action 0) alg.p0 (trajMeasure alg env)
theorem
Learning.IT.condDistrib_reward_zero
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace R]
[Nonempty R]
(alg : Algorithm α R)
(env : Environment α R)
:
⇑𝓛[reward 0 | action 0; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (action 0) (trajMeasure alg env)] ⇑env.ν0
theorem
Learning.IT.condDistrib_step
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace R]
[Nonempty R]
[StandardBorelSpace α]
[Nonempty α]
(alg : Algorithm α R)
(env : Environment α R)
(n : ℕ)
:
⇑𝓛[step (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] ⇑(stepKernel alg env n)
theorem
Learning.IT.condDistrib_action
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace R]
[Nonempty R]
[StandardBorelSpace α]
[Nonempty α]
(alg : Algorithm α R)
(env : Environment α R)
(n : ℕ)
:
⇑𝓛[action (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] ⇑(alg.policy n)
theorem
Learning.IT.condDistrib_reward
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace R]
[Nonempty R]
[StandardBorelSpace α]
[Nonempty α]
(alg : Algorithm α R)
(env : Environment α R)
(n : ℕ)
:
theorem
Learning.IT.isAlgEnvSeq_trajMeasure
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
[StandardBorelSpace R]
[Nonempty R]
[StandardBorelSpace α]
[Nonempty α]
(alg : Algorithm α R)
(env : Environment α R)
:
IsAlgEnvSeq action reward alg env (trajMeasure alg env)