Documentation

LeanBandits.SequentialLearning.Deterministic

Deterministic algorithms #

noncomputable def Learning.detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (nextAction : (n : ) → ((Finset.Iic n)α × R)α) (h_next : ∀ (n : ), Measurable (nextAction n)) (action0 : α) :

A deterministic algorithm, which chooses the action given by the function nextAction.

Equations
  • One or more equations did not get rendered due to their size.
Instances For
    @[simp]
    theorem Learning.detAlgorithm_p0 {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (nextAction : (n : ) → ((Finset.Iic n)α × R)α) (h_next : ∀ (n : ), Measurable (nextAction n)) (action0 : α) :
    (detAlgorithm nextAction h_next action0).p0 = MeasureTheory.Measure.dirac action0
    @[simp]
    theorem Learning.detAlgorithm_policy {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (nextAction : (n : ) → ((Finset.Iic n)α × R)α) (h_next : ∀ (n : ), Measurable (nextAction n)) (action0 : α) (n : ) :
    (detAlgorithm nextAction h_next action0).policy n = ProbabilityTheory.Kernel.deterministic (nextAction n)
    theorem Learning.IsAlgEnvSeq.HasLaw_action_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} {Ω : Type u_3} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ωα} {R' : ΩR} (h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P) :
    theorem Learning.IsAlgEnvSeq.action_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} {Ω : Type u_3} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ωα} {R' : ΩR} (h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P) :
    A 0 =ᵐ[P] fun (x : Ω) => action0
    theorem Learning.IsAlgEnvSeq.action_detAlgorithm_ae_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} {Ω : Type u_3} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ωα} {R' : ΩR} (h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P) (n : ) :
    A (n + 1) =ᵐ[P] fun (ω : Ω) => nextAction n (hist A R' n ω)
    theorem Learning.IsAlgEnvSeq.action_detAlgorithm_ae_all_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} {Ω : Type u_3} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ωα} {R' : ΩR} (h : IsAlgEnvSeq A R' (detAlgorithm nextAction h_next action0) env P) :
    ∀ᵐ (ω : Ω) P, A 0 ω = action0 ∀ (n : ), A (n + 1) ω = nextAction n (hist A R' n ω)
    theorem Learning.IT.HasLaw_action_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} :
    theorem Learning.IT.action_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} [MeasurableSingletonClass α] :
    action 0 =ᵐ[trajMeasure (detAlgorithm nextAction h_next action0) env] fun (x : α × R) => action0
    theorem Learning.IT.action_detAlgorithm_ae_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (n : ) :
    action (n + 1) =ᵐ[trajMeasure (detAlgorithm nextAction h_next action0) env] fun (h : α × R) => nextAction n (hist n h)
    theorem Learning.IT.action_detAlgorithm_ae_all_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextAction : (n : ) → ((Finset.Iic n)α × R)α} {h_next : ∀ (n : ), Measurable (nextAction n)} {action0 : α} {env : Environment α R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] :
    ∀ᵐ (h : α × R) trajMeasure (detAlgorithm nextAction h_next action0) env, action 0 h = action0 ∀ (n : ), action (n + 1) h = nextAction n (hist n h)