Documentation

LeanBandits.SequentialLearning.Algorithm

Algorithms #

structure Learning.Algorithm (α : Type u_4) (R : Type u_5) [MeasurableSpace α] [MeasurableSpace R] :
Type (max u_4 u_5)

A stochastic, sequential algorithm.

Instances For
    structure Learning.Environment (α : Type u_4) (R : Type u_5) [MeasurableSpace α] [MeasurableSpace R] :
    Type (max u_4 u_5)

    A stochastic environment.

    Instances For
      noncomputable def Learning.stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
      ProbabilityTheory.Kernel ((Finset.Iic n)α × R) (α × R)

      Kernel describing the distribution of the next action-reward pair given the history up to n.

      Equations
      Instances For
        Equations
        • =
        @[simp]
        theorem Learning.fst_stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
        (stepKernel alg env n).fst = alg.policy n
        def Learning.IsAlgEnvSeq.step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} (A : Ωα) (R' : ΩR) (n : ) (ω : Ω) :
        α × R

        Step of the algorithm-environment sequence: the action-reward pair at time n.

        Equations
        Instances For
          theorem Learning.IsAlgEnvSeq.measurable_step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (n : ) (hA : Measurable (A n)) (hR' : Measurable (R' n)) :
          Measurable (step A R' n)
          def Learning.IsAlgEnvSeq.hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} (A : Ωα) (R' : ΩR) (n : ) (ω : Ω) :
          (Finset.Iic n)α × R

          History of the algorithm-environment sequence up to time n.

          Equations
          Instances For
            theorem Learning.IsAlgEnvSeq.measurable_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (n : ) :
            Measurable (hist A R' n)
            theorem Learning.IsAlgEnvSeq.eval_comp_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {A : Ωα} {R' : ΩR} (n : ) :
            (fun (x : (Finset.Iic n)α × R) => x n, ) hist A R' n = step A R' n
            theorem Learning.IsAlgEnvSeq.fst_eval_comp_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {A : Ωα} {R' : ΩR} (n : ) :
            (fun (x : (Finset.Iic n)α × R) => (x n, ).1) hist A R' n = A n
            theorem Learning.IsAlgEnvSeq.snd_eval_comp_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {A : Ωα} {R' : ΩR} (n : ) :
            (fun (x : (Finset.Iic n)α × R) => (x n, ).2) hist A R' n = R' n
            structure Learning.IsAlgEnvSeq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (A : Ωα) (R' : ΩR) (alg : Algorithm α R) (env : Environment α R) (P : MeasureTheory.Measure Ω) [MeasureTheory.IsFiniteMeasure P] :

            An algorithm-environment sequence: a sequence of actions and rewards generated by an algorithm interacting with an environment.

            Instances For
              theorem Learning.IsAlgEnvSeq.hasLaw_step_zero {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeq A R' alg env P) :
              theorem Learning.IsAlgEnvSeq.hasCondDistrib_step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeq A R' alg env P) (n : ) :
              ProbabilityTheory.HasCondDistrib (step A R' (n + 1)) (hist A R' n) (stepKernel alg env n) P
              def Learning.IsAlgEnvSeq.filtration {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :

              Filtration generated by the history up to time n.

              Equations
              Instances For
                theorem Learning.IsAlgEnvSeq.measurable_action_filtration {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (n : ) :
                def Learning.IsAlgEnvSeq.filtrationAction {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :

                Filtration generated by the history at time n-1 together with the action at time n.

                Equations
                • One or more equations did not get rendered due to their size.
                Instances For
                  theorem Learning.IsAlgEnvSeq.filtrationAction_zero_eq_comap {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {hA : ∀ (n : ), Measurable (A n)} {hR' : ∀ (n : ), Measurable (R' n)} :
                  theorem Learning.IsAlgEnvSeq.filtrationAction_eq_comap {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {hA : ∀ (n : ), Measurable (A n)} {hR' : ∀ (n : ), Measurable (R' n)} (n : ) (hn : n 0) :
                  (filtrationAction hA hR') n = MeasurableSpace.comap (fun (ω : Ω) => (hist A R' (n - 1) ω, A n ω)) inferInstance
                  noncomputable def Learning.traj {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
                  ProbabilityTheory.Kernel ((Finset.Iic n)α × R) (α × R)

                  Kernel sending a partial trajectory of the bandit Seq Iic n → α × ℝ to a measure on ℕ → α × ℝ, supported on full trajectories that start with the partial one.

                  Equations
                  Instances For
                    Equations
                    • =
                    noncomputable def Learning.trajMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

                    Measure on the sequence of actions and observations generated by the algorithm/environment.

                    Equations
                    Instances For
                      Equations
                      • =
                      theorem Learning.eq_trajMeasure_of_isAlgEnvSeq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {Ω : Type u_4} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A₁ : Ωα} {R₁ : ΩR} (h : IsAlgEnvSeq A₁ R₁ alg env P) :
                      MeasureTheory.Measure.map (fun (ω : Ω) (n : ) => (A₁ n ω, R₁ n ω)) P = trajMeasure alg env
                      theorem Learning.isAlgEnvSeq_unique {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {Ω : Type u_4} {Ω' : Type u_5} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {A₁ : Ωα} {R₁ : ΩR} {A₂ : Ω'α} {R₂ : Ω'R} (h1 : IsAlgEnvSeq A₁ R₁ alg env P) (h2 : IsAlgEnvSeq A₂ R₂ alg env P') :
                      MeasureTheory.Measure.map (fun (ω : Ω) (n : ) => (A₁ n ω, R₁ n ω)) P = MeasureTheory.Measure.map (fun (ω : Ω') (n : ) => (A₂ n ω, R₂ n ω)) P'