Documentation

LeanBandits.Bandit

Bandit #

noncomputable def Bandits.Bandit.stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
ProbabilityTheory.Kernel ({ x : // x Finset.Iic n }α × R) (α × R)

Kernel describing the distribution of the next arm-reward pair given the history up to n.

Equations
Instances For
    @[simp]
    theorem Bandits.Bandit.fst_stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
    (stepKernel alg ν n).fst = alg.policy n
    @[simp]
    theorem Bandits.Bandit.snd_stepKernel {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
    (stepKernel alg ν n).snd = ν.comp (alg.policy n)
    noncomputable def Bandits.Bandit.trajMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :

    Measure on the sequence of arms pulled and rewards observed generated by the bandit.

    Equations
    Instances For
      noncomputable def Bandits.Bandit.streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :
      MeasureTheory.Measure (αR)

      Measure of an infinite stream of rewards from each arm.

      Equations
      Instances For
        noncomputable def Bandits.Bandit.measure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :
        MeasureTheory.Measure ((α × R) × (αR))

        Joint distribution of the sequence of arm pulled and rewards, and a stream of independent rewards from all arms.

        Equations
        Instances For
          @[simp]
          theorem Bandits.Bandit.fst_measure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :
          (measure alg ν).fst = trajMeasure alg ν
          theorem hasLaw_eval_infinitePi {ι : Type u_3} {X : ιType u_4} {mX : (i : ι) → MeasurableSpace (X i)} (μ : (i : ι) → MeasureTheory.Measure (X i)) [ : ∀ (i : ι), MeasureTheory.IsProbabilityMeasure (μ i)] (i : ι) :
          theorem Bandits.hasLaw_eval_eval_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) (a : α) :
          ProbabilityTheory.HasLaw (fun (h : αR) => h n a) (ν a) (Bandit.streamMeasure ν)
          theorem Bandits.identDistrib_eval_eval_id_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) (a : α) :
          ProbabilityTheory.IdentDistrib (fun (h : αR) => h n a) id (Bandit.streamMeasure ν) (ν a)
          theorem Bandits.Integrable.congr_identDistrib {Ω : Type u_3} {Ω' : Type u_4} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {μ : MeasureTheory.Measure Ω} {μ' : MeasureTheory.Measure Ω'} {X : Ω} {Y : Ω'} (hX : MeasureTheory.Integrable X μ) (hXY : ProbabilityTheory.IdentDistrib X Y μ μ') :
          theorem Bandits.integral_eval_streamMeasure {α : Type u_1} { : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ) [ProbabilityTheory.IsMarkovKernel ν] (n : ) (a : α) :
          (h : α), h n a Bandit.streamMeasure ν = (x : ), id x ν a
          theorem Bandits.iIndepFun_eval_streamMeasure'' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) :
          ProbabilityTheory.iIndepFun (fun (n : ) (ω : αR) => ω n a) (Bandit.streamMeasure ν)
          theorem Bandits.iIndepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :
          ProbabilityTheory.iIndepFun (fun (p : × α) (ω : αR) => ω p.1 p.2) (Bandit.streamMeasure ν)
          theorem Bandits.indepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {n m : } {a b : α} (h : n m a b) :
          ProbabilityTheory.IndepFun (fun (ω : αR) => ω n a) (fun (ω : αR) => ω m b) (Bandit.streamMeasure ν)
          theorem Bandits.indepFun_eval_streamMeasure' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {a b : α} (h : a b) :
          ProbabilityTheory.IndepFun (fun (ω : αR) (n : ) => ω n a) (fun (ω : αR) (n : ) => ω n b) (Bandit.streamMeasure ν)
          def Bandits.arm {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
          α

          arm n is the arm pulled at time n. This is a random variable on the measurable space ℕ → α × ℝ.

          Equations
          Instances For
            def Bandits.reward {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
            R

            reward n is the reward at time n. This is a random variable on the measurable space ℕ → α × R.

            Equations
            Instances For
              def Bandits.hist {α : Type u_1} {R : Type u_2} (n : ) (h : α × R) :
              { x : // x Finset.Iic n }α × R

              hist n is the history up to time n. This is a random variable on the measurable space ℕ → α × R.

              Equations
              Instances For
                theorem Bandits.measurable_arm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                theorem Bandits.measurable_arm_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                Measurable fun (p : × (α × R)) => arm p.1 p.2
                theorem Bandits.measurable_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :
                theorem Bandits.measurable_reward_prod {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} :
                Measurable fun (p : × (α × R)) => reward p.1 p.2
                theorem Bandits.measurable_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (n : ) :

                Filtration of the bandit process.

                Equations
                Instances For
                  theorem Bandits.hasLaw_step_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :
                  ProbabilityTheory.HasLaw (fun (h : α × R) => h 0) (alg.p0.compProd ν) (Bandit.trajMeasure alg ν)
                  theorem Bandits.condDistrib_reward' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [StandardBorelSpace α] [Nonempty α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
                  (ProbabilityTheory.condDistrib (reward (n + 1)) (fun (ω : α × R) => (hist n ω, arm (n + 1) ω)) (Bandit.trajMeasure alg ν)) =ᵐ[MeasureTheory.Measure.map (fun (ω : α × R) => (hist n ω, arm (n + 1) ω)) (Bandit.trajMeasure alg ν)] (ProbabilityTheory.Kernel.prodMkLeft ({ x : // x Finset.Iic n }α × R) ν)

                  The reward at time n+1 is independent of the history up to time n given the arm at n+1.

                  theorem Bandits.HasLaw_arm_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextArm : (n : ) → ({ x : // x Finset.Iic n }α × R)α} {h_next : ∀ (n : ), Measurable (nextArm n)} {arm0 : α} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] :
                  theorem Bandits.arm_zero_detAlgorithm {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextArm : (n : ) → ({ x : // x Finset.Iic n }α × R)α} {h_next : ∀ (n : ), Measurable (nextArm n)} {arm0 : α} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] [MeasurableSingletonClass α] :
                  arm 0 =ᵐ[Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν] fun (x : α × R) => arm0
                  theorem Bandits.arm_detAlgorithm_ae_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {nextArm : (n : ) → ({ x : // x Finset.Iic n }α × R)α} {h_next : ∀ (n : ), Measurable (nextArm n)} {arm0 : α} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
                  arm (n + 1) =ᵐ[Bandit.trajMeasure (Learning.detAlgorithm nextArm h_next arm0) ν] fun (h : α × R) => nextArm n fun (i : { x : // x Finset.Iic n }) => h i