Documentation

LeanBandits.SequentialLearning.Algorithm

Algorithms #

structure Learning.Algorithm (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :
Type (max u_3 u_4)

A stochastic, sequential algorithm.

Instances For
    structure Learning.Environment (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :
    Type (max u_3 u_4)

    A stochastic environment.

    Instances For
      noncomputable def Learning.stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
      ProbabilityTheory.Kernel ((Finset.Iic n)α × R) (α × R)

      Kernel describing the distribution of the next action-reward pair given the history up to n.

      Equations
      Instances For
        Equations
        • =
        @[simp]
        theorem Learning.fst_stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
        (stepKernel alg env n).fst = alg.policy n
        noncomputable def Learning.traj {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
        ProbabilityTheory.Kernel ((Finset.Iic n)α × R) (α × R)

        Kernel sending a partial trajectory of the bandit interaction Iic n → α × ℝ to a measure on ℕ → α × ℝ, supported on full trajectories that start with the partial one.

        Equations
        Instances For
          Equations
          • =
          noncomputable def Learning.trajMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

          Measure on the sequence of actions and observations generated by the algorithm/environment.

          Equations
          Instances For
            Equations
            • =
            def Learning.step {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
            α × R

            Action and reward at step n.

            Equations
            Instances For
              def Learning.action {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
              α

              action n is the action pulled at time n. This is a random variable on the measurable space ℕ → α × ℝ.

              Equations
              Instances For
                def Learning.reward {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
                R

                reward n is the reward at time n. This is a random variable on the measurable space ℕ → α × R.

                Equations
                Instances For
                  def Learning.hist {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
                  (Finset.Iic n)α × R

                  hist n is the history up to time n. This is a random variable on the measurable space ℕ → α × R.

                  Equations
                  Instances For
                    theorem Learning.fst_comp_step {α : Type u_1} {R : Type u_2} (n : ) :
                    theorem Learning.measurable_step {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                    theorem Learning.measurable_step_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                    Measurable fun (p : × (α × R)) => step p.1 p.2
                    theorem Learning.measurable_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                    theorem Learning.measurable_action_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                    Measurable fun (p : × (α × R)) => action p.1 p.2
                    theorem Learning.measurable_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                    theorem Learning.measurable_reward_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                    Measurable fun (p : × (α × R)) => reward p.1 p.2
                    theorem Learning.measurable_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :

                    Filtration of the algorithm interaction.

                    Equations
                    Instances For
                      theorem Learning.step_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
                      step n = (fun (x : (Finset.Iic n)α × R) => x n, ) hist n
                      theorem Learning.action_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
                      action n = (fun (x : (Finset.Iic n)α × R) => (x n, ).1) hist n
                      theorem Learning.reward_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
                      reward n = (fun (x : (Finset.Iic n)α × R) => (x n, ).2) hist n
                      theorem Learning.measurable_step_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                      theorem Learning.measurable_hist_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                      theorem Learning.measurable_action_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                      theorem Learning.measurable_reward_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                      theorem Learning.condDistrib_step {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ) :
                      𝓛[step (n + 1) | hist n; trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (hist n) (trajMeasure alg env)] (stepKernel alg env n)
                      theorem Learning.condDistrib_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ) :
                      theorem Learning.condDistrib_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ) :
                      𝓛[reward (n + 1) | fun (ω : α × R) => (hist n ω, action (n + 1) ω); trajMeasure alg env] =ᵐ[MeasureTheory.Measure.map (fun (ω : α × R) => (hist n ω, action (n + 1) ω)) (trajMeasure alg env)] (env.feedback n)
                      theorem Learning.hasLaw_step_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :
                      theorem Learning.hasLaw_action_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :
                      theorem Learning.condDistrib_reward_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) :

                      A stationary environment, in which the distribution of the next reward depends only on the last action.

                      Equations
                      Instances For