Documentation

LeanBandits.SequentialLearning.IonescuTulceaSpace

Algorithms #

def Learning.IT.step {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
α × R

Action and reward at step n.

Equations
Instances For
    def Learning.IT.action {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
    α

    action n is the action pulled at time n. This is a random variable on the measurable space ℕ → α × ℝ.

    Equations
    Instances For
      def Learning.IT.reward {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
      R

      reward n is the reward at time n. This is a random variable on the measurable space ℕ → α × R.

      Equations
      Instances For
        def Learning.IT.hist {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
        (Finset.Iic n)α × R

        hist n is the history up to time n. This is a random variable on the measurable space ℕ → α × R.

        Equations
        Instances For
          theorem Learning.IT.fst_comp_step {α : Type u_1} {R : Type u_2} (n : ) :
          theorem Learning.IT.measurable_step {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
          theorem Learning.IT.measurable_step_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
          Measurable fun (p : × (α × R)) => step p.1 p.2
          theorem Learning.IT.measurable_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
          theorem Learning.IT.measurable_action_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
          Measurable fun (p : × (α × R)) => action p.1 p.2
          theorem Learning.IT.measurable_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
          theorem Learning.IT.measurable_reward_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
          Measurable fun (p : × (α × R)) => reward p.1 p.2
          theorem Learning.IT.measurable_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
          theorem Learning.IT.step_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
          step n = (fun (x : (Finset.Iic n)α × R) => x n, ) hist n
          theorem Learning.IT.action_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
          action n = (fun (x : (Finset.Iic n)α × R) => (x n, ).1) hist n
          theorem Learning.IT.reward_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
          reward n = (fun (x : (Finset.Iic n)α × R) => (x n, ).2) hist n
          theorem Learning.IT.measurable_step_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
          theorem Learning.IT.measurable_hist_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :

          Filtration generated by the history at time n-1 together with the action at time n.

          Equations
          • One or more equations did not get rendered due to their size.
          Instances For
            theorem Learning.IT.filtrationAction_eq_comap {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) (hn : n 0) :
            (filtrationAction α R) n = MeasurableSpace.comap (fun (ω : α × R) => (hist (n - 1) ω, action n ω)) inferInstance
            theorem Learning.IT.filtration_le_filtrationAction_add_one {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
            (IT.filtration α R) n (filtrationAction α R) (n + 1)
            theorem Learning.IT.filtration_le_filtrationAction {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {m n : } (h : n < m) :
            (IT.filtration α R) n (filtrationAction α R) m
            theorem Learning.IT.filtrationAction_le_filtration_self {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
            (filtrationAction α R) n (IT.filtration α R) n
            theorem Learning.IT.filtrationAction_le_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {m n : } (h : m n) :
            (filtrationAction α R) m (IT.filtration α R) n
            theorem Learning.IT.hasLaw_step_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :
            theorem Learning.IT.hasLaw_action_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :
            theorem Learning.IT.condDistrib_step {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) (n : ) :
            𝓛[step (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] (stepKernel alg env n)
            theorem Learning.IT.condDistrib_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) (n : ) :
            theorem Learning.IT.condDistrib_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) (n : ) :
            𝓛[reward (n + 1) | fun (ω : α × R) => (hist n ω, action (n + 1) ω); trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (fun (ω : α × R) => (hist n ω, action (n + 1) ω)) (trajMeasure alg env)] (env.feedback n)
            theorem Learning.IT.isAlgEnvSeq_trajMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] [StandardBorelSpace α] [Nonempty α] (alg : Algorithm α R) (env : Environment α R) :