Documentation

LeanBandits.SequentialLearning.Algorithm

Algorithms #

structure Learning.Algorithm (α : Type u_4) (R : Type u_5) [MeasurableSpace α] [MeasurableSpace R] :
Type (max u_4 u_5)

A stochastic, sequential algorithm.

Instances For
    structure Learning.Environment (α : Type u_4) (R : Type u_5) [MeasurableSpace α] [MeasurableSpace R] :
    Type (max u_4 u_5)

    A stochastic environment.

    Instances For
      noncomputable def Learning.stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
      ProbabilityTheory.Kernel ((Finset.Iic n)α × R) (α × R)

      Kernel describing the distribution of the next action-reward pair given the history up to n.

      Equations
      Instances For
        Equations
        • =
        @[simp]
        theorem Learning.fst_stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
        (stepKernel alg env n).fst = alg.policy n
        def Learning.IsAlgEnvSeq.step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} (A : Ωα) (R' : ΩR) (n : ) (ω : Ω) :
        α × R

        Step of the algorithm-environment sequence: the action-reward pair at time n.

        Equations
        Instances For
          theorem Learning.IsAlgEnvSeq.measurable_step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (n : ) (hA : Measurable (A n)) (hR' : Measurable (R' n)) :
          Measurable (step A R' n)
          def Learning.IsAlgEnvSeq.hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} (A : Ωα) (R' : ΩR) (n : ) (ω : Ω) :
          (Finset.Iic n)α × R

          History of the algorithm-environment sequence up to time n.

          Equations
          Instances For
            theorem Learning.IsAlgEnvSeq.measurable_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) (n : ) :
            Measurable (hist A R' n)
            theorem Learning.IsAlgEnvSeq.eval_comp_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {A : Ωα} {R' : ΩR} (n : ) :
            (fun (x : (Finset.Iic n)α × R) => x n, ) hist A R' n = step A R' n
            theorem Learning.IsAlgEnvSeq.fst_eval_comp_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {A : Ωα} {R' : ΩR} (n : ) :
            (fun (x : (Finset.Iic n)α × R) => (x n, ).1) hist A R' n = A n
            theorem Learning.IsAlgEnvSeq.snd_eval_comp_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} {A : Ωα} {R' : ΩR} (n : ) :
            (fun (x : (Finset.Iic n)α × R) => (x n, ).2) hist A R' n = R' n
            structure Learning.IsAlgEnvSeq {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (A : Ωα) (R' : ΩR) (alg : Algorithm α R) (env : Environment α R) (P : MeasureTheory.Measure Ω) [MeasureTheory.IsFiniteMeasure P] :

            An algorithm-environment sequence: a sequence of actions and rewards generated by an algorithm interacting with an environment.

            Instances For
              structure Learning.IsAlgEnvSeqUntil {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (A : Ωα) (R' : ΩR) (alg : Algorithm α R) (env : Environment α R) (P : MeasureTheory.Measure Ω) [MeasureTheory.IsFiniteMeasure P] (N : ) :

              An algorithm-environment sequence: a sequence of actions and rewards generated by an algorithm interacting with an environment.

              Instances For
                theorem Learning.IsAlgEnvSeqUntil.mono {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] {N : } [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeqUntil A R' alg env P N) {N' : } (hN : N' N) :
                IsAlgEnvSeqUntil A R' alg env P N'
                theorem Learning.IsAlgEnvSeq.isAlgEnvSeqUntil {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeq A R' alg env P) (N : ) :
                IsAlgEnvSeqUntil A R' alg env P N
                theorem Learning.IsAlgEnvSeq.hasLaw_step_zero {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeq A R' alg env P) :
                theorem Learning.IsAlgEnvSeqUntil.hasLaw_step_zero {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] {N : } [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeqUntil A R' alg env P N) :
                theorem Learning.IsAlgEnvSeq.hasCondDistrib_step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeq A R' alg env P) (n : ) :
                ProbabilityTheory.HasCondDistrib (step A R' (n + 1)) (hist A R' n) (stepKernel alg env n) P
                theorem Learning.IsAlgEnvSeqUntil.hasCondDistrib_step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsFiniteMeasure P] {N : } [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (h : IsAlgEnvSeqUntil A R' alg env P N) (n : ) (hn : n < N) :
                def Learning.IsAlgEnvSeq.filtration {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :

                Filtration generated by the history up to time n.

                Equations
                Instances For
                  theorem Learning.IsAlgEnvSeq.adapted_hist {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :
                  theorem Learning.IsAlgEnvSeq.adapted_step {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :
                  theorem Learning.IsAlgEnvSeq.adapted_action {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :
                  theorem Learning.IsAlgEnvSeq.adapted_reward {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :
                  def Learning.IsAlgEnvSeq.filtrationAction {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} (hA : ∀ (n : ), Measurable (A n)) (hR' : ∀ (n : ), Measurable (R' n)) :

                  Filtration generated by the history at time n-1 together with the action at time n.

                  Equations
                  • One or more equations did not get rendered due to their size.
                  Instances For
                    theorem Learning.IsAlgEnvSeq.filtrationAction_zero_eq_comap {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {hA : ∀ (n : ), Measurable (A n)} {hR' : ∀ (n : ), Measurable (R' n)} :
                    theorem Learning.IsAlgEnvSeq.filtrationAction_eq_comap {α : Type u_1} {R : Type u_2} {Ω : Type u_3} { : MeasurableSpace α} {mR : MeasurableSpace R} { : MeasurableSpace Ω} {A : Ωα} {R' : ΩR} {hA : ∀ (n : ), Measurable (A n)} {hR' : ∀ (n : ), Measurable (R' n)} (n : ) (hn : n 0) :
                    (filtrationAction hA hR') n = MeasurableSpace.comap (fun (ω : Ω) => (hist A R' (n - 1) ω, A n ω)) inferInstance
                    noncomputable def Learning.traj {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
                    ProbabilityTheory.Kernel ((Finset.Iic n)α × R) (α × R)

                    Kernel sending a partial trajectory of the bandit Seq Iic n → α × ℝ to a measure on ℕ → α × ℝ, supported on full trajectories that start with the partial one.

                    Equations
                    Instances For
                      Equations
                      • =
                      noncomputable def Learning.trajMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

                      Measure on the sequence of actions and observations generated by the algorithm/environment.

                      Equations
                      Instances For
                        Equations
                        • =
                        theorem Learning.eq_trajMeasure_of_isAlgEnvSeq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {Ω : Type u_4} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A₁ : Ωα} {R₁ : ΩR} (h : IsAlgEnvSeq A₁ R₁ alg env P) :
                        MeasureTheory.Measure.map (fun (ω : Ω) (n : ) => (A₁ n ω, R₁ n ω)) P = trajMeasure alg env
                        theorem Learning.eq_trajMeasure_map_frestrictLe_of_isAlgEnvSeqUntil {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {Ω : Type u_4} { : MeasurableSpace Ω} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A₁ : Ωα} {R₁ : ΩR} {N : } (h : IsAlgEnvSeqUntil A₁ R₁ alg env P N) :
                        MeasureTheory.Measure.map (fun (ω : Ω) (n : (Finset.Iic N)) => (A₁ (↑n) ω, R₁ (↑n) ω)) P = MeasureTheory.Measure.map (Preorder.frestrictLe N) (trajMeasure alg env)
                        theorem Learning.isAlgEnvSeq_unique {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {Ω : Type u_4} {Ω' : Type u_5} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {A₁ : Ωα} {R₁ : ΩR} {A₂ : Ω'α} {R₂ : Ω'R} (h1 : IsAlgEnvSeq A₁ R₁ alg env P) (h2 : IsAlgEnvSeq A₂ R₂ alg env P') :
                        MeasureTheory.Measure.map (fun (ω : Ω) (n : ) => (A₁ n ω, R₁ n ω)) P = MeasureTheory.Measure.map (fun (ω : Ω') (n : ) => (A₂ n ω, R₂ n ω)) P'

                        The law of the sequence of actions and observations generated by an algorithm-environment pair is unique: it does not depend on the probability space used.

                        theorem Learning.isAlgEnvSeqUntil_unique {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {Ω : Type u_4} {Ω' : Type u_5} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] {alg : Algorithm α R} {env : Environment α R} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {A₁ : Ωα} {R₁ : ΩR} {A₂ : Ω'α} {R₂ : Ω'R} {N : } (h1 : IsAlgEnvSeqUntil A₁ R₁ alg env P N) (h2 : IsAlgEnvSeqUntil A₂ R₂ alg env P' N) :
                        MeasureTheory.Measure.map (fun (ω : Ω) (n : (Finset.Iic N)) => (A₁ (↑n) ω, R₁ (↑n) ω)) P = MeasureTheory.Measure.map (fun (ω : Ω') (n : (Finset.Iic N)) => (A₂ (↑n) ω, R₂ (↑n) ω)) P'