Deterministic algorithms #
noncomputable def
Learning.detAlgorithm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α)
(h_next : ∀ (n : ℕ), Measurable (nextAction n))
(action0 : α)
:
Algorithm α R
A deterministic algorithm, which chooses the action given by the function nextAction.
Equations
- One or more equations did not get rendered due to their size.
Instances For
@[simp]
theorem
Learning.detAlgorithm_p0
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α)
(h_next : ∀ (n : ℕ), Measurable (nextAction n))
(action0 : α)
:
@[simp]
theorem
Learning.detAlgorithm_policy
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
(nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α)
(h_next : ∀ (n : ℕ), Measurable (nextAction n))
(action0 : α)
(n : ℕ)
:
(detAlgorithm nextAction h_next action0).policy n = ProbabilityTheory.Kernel.deterministic (nextAction n) ⋯
theorem
Learning.IsAlgEnvSeq.HasLaw_action_zero_detAlgorithm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
{Ω : Type u_3}
{mΩ : MeasurableSpace Ω}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → α}
{R' : ℕ → Ω → R}
(h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P)
:
ProbabilityTheory.HasLaw (A 0) (MeasureTheory.Measure.dirac action0) P
theorem
Learning.IsAlgEnvSeq.action_zero_detAlgorithm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
{Ω : Type u_3}
{mΩ : MeasurableSpace Ω}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → α}
{R' : ℕ → Ω → R}
(h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P)
:
theorem
Learning.IsAlgEnvSeq.action_detAlgorithm_ae_eq
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
{Ω : Type u_3}
{mΩ : MeasurableSpace Ω}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → α}
{R' : ℕ → Ω → R}
(h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P)
(n : ℕ)
:
theorem
Learning.IsAlgEnvSeq.action_detAlgorithm_ae_all_eq
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
{Ω : Type u_3}
{mΩ : MeasurableSpace Ω}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
{P : MeasureTheory.Measure Ω}
[MeasureTheory.IsProbabilityMeasure P]
{A : ℕ → Ω → α}
{R' : ℕ → Ω → R}
(h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P)
:
theorem
Learning.IT.HasLaw_action_zero_detAlgorithm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
:
ProbabilityTheory.HasLaw (action 0) (MeasureTheory.Measure.dirac action0)
(trajMeasure (detAlgorithm nextAction h_next action0) env)
theorem
Learning.IT.action_zero_detAlgorithm
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
[MeasurableSingletonClass α]
:
action 0 =ᵐ[trajMeasure (detAlgorithm nextAction h_next action0) env] fun (x : ℕ → α × R) => action0
theorem
Learning.IT.action_detAlgorithm_ae_eq
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
(n : ℕ)
:
action (n + 1) =ᵐ[trajMeasure (detAlgorithm nextAction h_next action0) env] fun (h : ℕ → α × R) =>
nextAction n (hist n h)
theorem
Learning.IT.action_detAlgorithm_ae_all_eq
{α : Type u_1}
{R : Type u_2}
{mα : MeasurableSpace α}
{mR : MeasurableSpace R}
{nextAction : (n : ℕ) → (↥(Finset.Iic n) → α × R) → α}
{h_next : ∀ (n : ℕ), Measurable (nextAction n)}
{action0 : α}
{env : Environment α R}
[StandardBorelSpace α]
[Nonempty α]
[StandardBorelSpace R]
[Nonempty R]
: