Documentation

LeanBandits.Algorithm

Bandit #

structure Learning.Algorithm (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :
Type (max u_3 u_4)

A stochastic, sequential algorithm.

Instances For
    structure Learning.Environment (α : Type u_3) (R : Type u_4) [MeasurableSpace α] [MeasurableSpace R] :
    Type (max u_3 u_4)

    A stochastic environment.

    Instances For
      noncomputable def Learning.stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
      ProbabilityTheory.Kernel ({ x : // x Finset.Iic n }α × R) (α × R)

      Kernel describing the distribution of the next action-reward pair given the history up to n.

      Equations
      Instances For
        Equations
        • =
        @[simp]
        theorem Learning.fst_stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
        (stepKernel alg env n).fst = alg.policy n
        noncomputable def Learning.traj {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) (n : ) :
        ProbabilityTheory.Kernel ({ x : // x Finset.Iic n }α × R) (α × R)

        Kernel sending a partial trajectory of the bandit interaction Iic n → α × ℝ to a measure on ℕ → α × ℝ, supported on full trajectories that start with the partial one.

        Equations
        Instances For
          Equations
          • =
          noncomputable def Learning.trajMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :

          Measure on the sequence of actions and observations generated by the algorithm/environment.

          Equations
          Instances For
            Equations
            • =
            def Learning.step {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
            α × R

            Action and reward at step n.

            Equations
            Instances For
              def Learning.action {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
              α

              action n is the action pulled at time n. This is a random variable on the measurable space ℕ → α × ℝ.

              Equations
              Instances For
                def Learning.reward {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
                R

                reward n is the reward at time n. This is a random variable on the measurable space ℕ → α × R.

                Equations
                Instances For
                  def Learning.hist {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
                  { x : // x Finset.Iic n }α × R

                  hist n is the history up to time n. This is a random variable on the measurable space ℕ → α × R.

                  Equations
                  Instances For
                    theorem Learning.fst_comp_step {α : Type u_1} {R : Type u_2} (n : ) :
                    theorem Learning.measurable_step {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                    theorem Learning.measurable_step_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                    Measurable fun (p : × (α × R)) => step p.1 p.2
                    theorem Learning.measurable_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                    theorem Learning.measurable_action_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                    Measurable fun (p : × (α × R)) => action p.1 p.2
                    theorem Learning.measurable_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                    theorem Learning.measurable_reward_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                    Measurable fun (p : × (α × R)) => reward p.1 p.2
                    theorem Learning.measurable_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :

                    Filtration of the algorithm interaction.

                    Equations
                    Instances For
                      theorem Learning.condDistrib_step {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ) :
                      theorem Learning.condDistrib_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ) :
                      theorem Learning.condDistrib_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Algorithm α R) (env : Environment α R) (n : ) :
                      (ProbabilityTheory.condDistrib (reward (n + 1)) (fun (ω : α × R) => (hist n ω, action (n + 1) ω)) (trajMeasure alg env)) =ᵐ[MeasureTheory.Measure.map (fun (ω : α × R) => (hist n ω, action (n + 1) ω)) (trajMeasure alg env)] (env.feedback n)
                      theorem Learning.hasLaw_step_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :
                      theorem Learning.hasLaw_action_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Algorithm α R) (env : Environment α R) :
                      noncomputable def Learning.detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (nextaction : (n : ) → ({ x : // x Finset.Iic n }α × R)α) (h_next : ∀ (n : ), Measurable (nextaction n)) (action0 : α) :

                      A deterministic algorithm.

                      Equations
                      • One or more equations did not get rendered due to their size.
                      Instances For
                        @[simp]
                        theorem Learning.detAlgorithm_policy {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (nextaction : (n : ) → ({ x : // x Finset.Iic n }α × R)α) (h_next : ∀ (n : ), Measurable (nextaction n)) (action0 : α) (n : ) :
                        (detAlgorithm nextaction h_next action0).policy n = ProbabilityTheory.Kernel.deterministic (nextaction n)
                        @[simp]
                        theorem Learning.detAlgorithm_p0 {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (nextaction : (n : ) → ({ x : // x Finset.Iic n }α × R)α) (h_next : ∀ (n : ), Measurable (nextaction n)) (action0 : α) :
                        (detAlgorithm nextaction h_next action0).p0 = MeasureTheory.Measure.dirac action0
                        theorem Learning.HasLaw_action_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextaction : (n : ) → ({ x : // x Finset.Iic n }α × R)α} {h_next : ∀ (n : ), Measurable (nextaction n)} {action0 : α} {env : Environment α R} :
                        theorem Learning.action_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextaction : (n : ) → ({ x : // x Finset.Iic n }α × R)α} {h_next : ∀ (n : ), Measurable (nextaction n)} {action0 : α} {env : Environment α R} [MeasurableSingletonClass α] :
                        action 0 =ᵐ[trajMeasure (detAlgorithm nextaction h_next action0) env] fun (x : α × R) => action0
                        theorem Learning.action_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
                        action n = (fun (x : { x : // x Finset.Iic n }α × R) => (x n, ).1) hist n
                        theorem Learning.reward_eq_eval_comp_hist {α : Type u_1} {R : Type u_2} (n : ) :
                        reward n = (fun (x : { x : // x Finset.Iic n }α × R) => (x n, ).2) hist n
                        theorem Learning.measurable_hist_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                        theorem Learning.measurable_action_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                        theorem Learning.measurable_reward_filtration {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                        theorem Learning.action_detAlgorithm_ae_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextaction : (n : ) → ({ x : // x Finset.Iic n }α × R)α} {h_next : ∀ (n : ), Measurable (nextaction n)} {action0 : α} {env : Environment α R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (n : ) :
                        action (n + 1) =ᵐ[trajMeasure (detAlgorithm nextaction h_next action0) env] fun (h : α × R) => nextaction n fun (i : { x : // x Finset.Iic n }) => h i

                        A stationary environment, in which the distribution of the next reward depends only on the last action.

                        Equations
                        Instances For