Documentation

LeanBandits.Bandit.Bandit

Bandit #

noncomputable def Bandits.streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) :
MeasureTheory.Measure (αR)

Measure of an infinite stream of rewards from each action.

Equations
Instances For
    theorem hasLaw_eval_infinitePi {ι : Type u_3} {X : ιType u_4} {mX : (i : ι) → MeasurableSpace (X i)} (μ : (i : ι) → MeasureTheory.Measure (X i)) [ : ∀ (i : ι), MeasureTheory.IsProbabilityMeasure (μ i)] (i : ι) :
    theorem Bandits.hasLaw_eval_eval_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) (a : α) :
    ProbabilityTheory.HasLaw (fun (h : αR) => h n a) (ν a) (streamMeasure ν)
    theorem Bandits.identDistrib_eval_eval_id_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) (a : α) :
    ProbabilityTheory.IdentDistrib (fun (h : αR) => h n a) id (streamMeasure ν) (ν a)
    theorem Bandits.Integrable.congr_identDistrib {Ω : Type u_3} {Ω' : Type u_4} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {μ : MeasureTheory.Measure Ω} {μ' : MeasureTheory.Measure Ω'} {X : Ω} {Y : Ω'} (hX : MeasureTheory.Integrable X μ) (hXY : ProbabilityTheory.IdentDistrib X Y μ μ') :
    theorem Bandits.integral_eval_streamMeasure {α : Type u_1} { : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ) [ProbabilityTheory.IsMarkovKernel ν] (n : ) (a : α) :
    (h : α), h n a streamMeasure ν = (x : ), id x ν a
    theorem Bandits.iIndepFun_eval_streamMeasure' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :
    ProbabilityTheory.iIndepFun (fun (n : ) (ω : αR) => ω n) (streamMeasure ν)
    theorem Bandits.iIndepFun_eval_streamMeasure'' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) :
    ProbabilityTheory.iIndepFun (fun (n : ) (ω : αR) => ω n a) (streamMeasure ν)
    theorem Bandits.iIndepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] :
    ProbabilityTheory.iIndepFun (fun (p : × α) (ω : αR) => ω p.1 p.2) (streamMeasure ν)
    theorem Bandits.indepFun_eval_streamMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {n m : } {a b : α} (h : n m a b) :
    ProbabilityTheory.IndepFun (fun (ω : αR) => ω n a) (fun (ω : αR) => ω m b) (streamMeasure ν)
    theorem Bandits.indepFun_eval_streamMeasure' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] {a b : α} (h : a b) :
    ProbabilityTheory.IndepFun (fun (ω : αR) (n : ) => ω n a) (fun (ω : αR) (n : ) => ω n b) (streamMeasure ν)
    def Bandits.ArrayModel.probSpace (α : Type u_1) (R : Type u_2) :
    Type (max u_1 u_2)

    Probability space for the array model of stochastic bandits.

    Equations
    Instances For
      noncomputable def Bandits.ArrayModel.arrayMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) :

      Probability measure for the array model of stochastic bandits.

      Equations
      Instances For
        noncomputable def Bandits.ArrayModel.initAlgFunction {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) :
        unitIntervalα

        The initial action is the image of a uniform random variable by this function.

        Equations
        Instances For
          noncomputable def Bandits.ArrayModel.algFunction {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) (n : ) :
          ((Finset.Iic n)α × R)unitIntervalα

          The next action is the image of the history and a uniform random variable by this function.

          Equations
          Instances For
            theorem Bandits.ArrayModel.algFunction_map {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] (alg : Learning.Algorithm α R) (n : ) (h : (Finset.Iic n)α × R) :
            noncomputable def Bandits.ArrayModel.hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ) :
            (Finset.Iic n)α × R

            History of actions and rewards up to time n in the array model.

            Equations
            Instances For
              @[simp]
              theorem Bandits.ArrayModel.hist_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) :
              hist alg ω 0 = fun (x : (Finset.Iic 0)) => (initAlgFunction alg (ω.1 0), ω.2 0 (initAlgFunction alg (ω.1 0)))
              theorem Bandits.ArrayModel.hist_add_one {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ) :
              have a := algFunction alg n (hist alg ω n) (ω.1 (n + 1)); hist alg ω (n + 1) = fun (i : (Finset.Iic (n + 1))) => if hin : i n then hist alg ω n i, else (a, ω.2 (Learning.pullCount' n (hist alg ω n) a) a)
              theorem Bandits.ArrayModel.hist_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ) :
              hist alg ω n = fun (i : (Finset.Iic n)) => hist alg ω i i,
              theorem Bandits.ArrayModel.hist_add_one_eq_IicSuccProd' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ) :
              have a := algFunction alg n (hist alg ω n) (ω.1 (n + 1)); hist alg ω (n + 1) = (MeasurableEquiv.IicSuccProd (fun (x : ) => α × R) n).symm (hist alg ω n, a, ω.2 (Learning.pullCount' n (hist alg ω n) a) a)
              noncomputable def Bandits.ArrayModel.action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ) (ω : probSpace α R) :
              α

              Action taken at time n in the array model.

              Equations
              Instances For
                theorem Bandits.ArrayModel.action_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) :
                action alg 0 = fun (ω : probSpace α R) => initAlgFunction alg (ω.1 0)
                theorem Bandits.ArrayModel.action_add_one_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ) :
                action alg (n + 1) = fun (ω : probSpace α R) => algFunction alg n (hist alg ω n) (ω.1 (n + 1))
                noncomputable def Bandits.ArrayModel.reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ) (ω : probSpace α R) :
                R

                Reward received at time n in the array model.

                Equations
                Instances For
                  theorem Bandits.ArrayModel.reward_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) :
                  reward alg 0 = fun (ω : probSpace α R) => ω.2 0 (action alg 0 ω)
                  theorem Bandits.ArrayModel.reward_add_one {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ) :
                  reward alg (n + 1) = fun (ω : probSpace α R) => ω.2 (Learning.pullCount' n (hist alg ω n) (action alg (n + 1) ω)) (action alg (n + 1) ω)
                  theorem Bandits.ArrayModel.reward_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ) :
                  reward alg n = fun (ω : probSpace α R) => ω.2 (Learning.pullCount (action alg) (action alg n ω) n ω) (action alg n ω)
                  theorem Bandits.ArrayModel.measurable_action_add_one' {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] {alg : Learning.Algorithm α R} (n : ) (h : Measurable fun (x : probSpace α R) => hist alg x n) :
                  Measurable fun (x : probSpace α R) => algFunction alg n (hist alg x n) (x.1 (n + 1))
                  theorem Bandits.ArrayModel.measurable_pullCount'_action_add_one {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] {alg : Learning.Algorithm α R} (n : ) (h_hist : Measurable fun (x : probSpace α R) => hist alg x n) :
                  Measurable fun (x : probSpace α R) => Learning.pullCount' n (hist alg x n) (algFunction alg n (hist alg x n) (x.1 (n + 1)))
                  theorem Bandits.ArrayModel.measurable_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ) :
                  Measurable fun (ω : probSpace α R) => hist alg ω n
                  theorem Bandits.ArrayModel.measurable_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ) :
                  theorem Bandits.ArrayModel.measurable_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ) :
                  theorem Bandits.ArrayModel.hist_add_one_eq_IicSuccProd {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (ω : probSpace α R) (n : ) :
                  hist alg ω (n + 1) = (MeasurableEquiv.IicSuccProd (fun (x : ) => α × R) n).symm (hist alg ω n, action alg (n + 1) ω, reward alg (n + 1) ω)
                  theorem Bandits.ArrayModel.measurable_pullCount_action_add_one {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ) :
                  Measurable fun (ω : probSpace α R) => Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω
                  theorem Bandits.ArrayModel.hist_congr {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ) {ω ω' : probSpace α R} (hω1 : in, ω.1 i = ω'.1 i) (hω2 : ∀ (i : ) (a : α), i < Learning.pullCount (action alg) a (n + 1) ωω.2 i a = ω'.2 i a) :
                  hist alg ω n = hist alg ω' n
                  theorem Bandits.ArrayModel.stepsUntil_congr_aux {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (a : α) (m n : ) {ω ω' : probSpace α R} (hω1 : ∀ (i : ), ω.1 i = ω'.1 i) (hω2_ne : ∀ (i : ) (b : α), b aω.2 i b = ω'.2 i b) (hω2_eq : ∀ (i : ), i + 1 mω.2 i a = ω'.2 i a) (h_eq : action alg (n + 1) ω = a Learning.pullCount (action alg) a (n + 1) ω = m) :
                  action alg (n + 1) ω' = a Learning.pullCount (action alg) a (n + 1) ω' = m
                  theorem Bandits.ArrayModel.stepsUntil_congr {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (a : α) (m n : ) {ω ω' : probSpace α R} (hω1 : ∀ (i : ), ω.1 i = ω'.1 i) (hω2_ne : ∀ (i : ) (b : α), b aω.2 i b = ω'.2 i b) (hω2_eq : ∀ (i : ), i + 1 mω.2 i a = ω'.2 i a) :
                  action alg (n + 1) ω = a Learning.pullCount (action alg) a (n + 1) ω = m action alg (n + 1) ω' = a Learning.pullCount (action alg) a (n + 1) ω' = m
                  theorem Bandits.ArrayModel.stepsUntil_indicator_congr {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (a : α) (m n : ) {ω ω' : probSpace α R} (hω1 : ∀ (i : ), ω.1 i = ω'.1 i) (hω2_ne : ∀ (i : ) (b : α), b aω.2 i b = ω'.2 i b) (hω2_eq : ∀ (i : ), i + 1 mω.2 i a = ω'.2 i a) :
                  {ω : probSpace α R | action alg (n + 1) ω = a Learning.pullCount (action alg) a (n + 1) ω = m}.indicator (fun (x : probSpace α R) => 1) ω = {ω : probSpace α R | action alg (n + 1) ω = a Learning.pullCount (action alg) a (n + 1) ω = m}.indicator (fun (x : probSpace α R) => 1) ω'
                  theorem Bandits.ArrayModel.measurable_hist_todo {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (n : ) :
                  Measurable fun (x : probSpace α R) => hist alg x n
                  noncomputable def Bandits.ArrayModel.truePast {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] (alg : Learning.Algorithm α R) (a : α) (n : ) (ω : probSpace α R) :

                  All random variables in the space, except for the unseen rewards for action a after time n.

                  Equations
                  • One or more equations did not get rendered due to their size.
                  Instances For
                    theorem Bandits.ArrayModel.truePast_eq_of_pullCount_eq {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] (alg : Learning.Algorithm α R) (a : α) (n m : ) (ω : probSpace α R) (h_pc : Learning.pullCount (action alg) a (n + 1) ω = m) :
                    truePast alg a n ω = (ω.1, fun (i : ) (b : α) => if b = a then if m 0 then ω.2 (min i (m - 1)) a else .some else ω.2 i b)
                    theorem Bandits.ArrayModel.truePast_eq_of_pullCount_eq_of_ne_zero {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] (alg : Learning.Algorithm α R) (a : α) (n m : ) (ω : probSpace α R) (h_pc : Learning.pullCount (action alg) a (n + 1) ω = m) (hm : m 0) :
                    truePast alg a n ω = (ω.1, fun (i : ) (b : α) => if b = a then ω.2 (min i (m - 1)) a else ω.2 i b)
                    theorem Bandits.ArrayModel.measurable_hist_truePast {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (a : α) (n : ) :
                    Measurable fun (x : probSpace α R) => hist alg x n
                    theorem Bandits.ArrayModel.measurable_action_add_one_truePast {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (a : α) (n : ) :
                    Measurable (action alg (n + 1))
                    theorem Bandits.ArrayModel.measurable_stepsUntil {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (a : α) (m n : ) :
                    Measurable ({ω : probSpace α R | action alg (n + 1) ω = a Learning.pullCount (action alg) a (n + 1) ω = m}.indicator fun (x : probSpace α R) => 1)
                    theorem Bandits.ArrayModel.measurable_pullCount_action_add_one_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] (alg : Learning.Algorithm α R) (n : ) :
                    Measurable fun (ω : probSpace α R) => Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω
                    theorem Bandits.ArrayModel.map_snd_apply_arrayMeasure {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} {ν : ProbabilityTheory.Kernel α R} [ProbabilityTheory.IsMarkovKernel ν] (n : ) (a : α) :
                    MeasureTheory.Measure.map (fun (ω : probSpace α R) => ω.2 n a) (arrayMeasure ν) = ν a
                    theorem Bandits.ArrayModel.indepFun_fst_zero_snd_zero_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) :
                    ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.1 0) (fun (ω : probSpace α R) => ω.2 0 a) (arrayMeasure ν)
                    theorem Bandits.ArrayModel.indepFun_fst_add_one_aux {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
                    ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.1 (n + 1)) (fun (ω : probSpace α R) => (fun (i : (Finset.Iic n)) => ω.1 i, ω.2)) (arrayMeasure ν)
                    theorem Bandits.ArrayModel.indepFun_fst_add_one_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [StandardBorelSpace R] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
                    ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.1 (n + 1)) (fun (x : probSpace α R) => hist alg x n) (arrayMeasure ν)
                    theorem Bandits.ArrayModel.indepFun_snd_apply_aux {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [DecidableEq α] [Nonempty R] (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (m : ) :
                    ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.2 m a) (fun (ω : probSpace α R) => (ω.1, fun (k : ) (b : α) => if b = a then if m 0 then ω.2 (min k (m - 1)) b else .some else ω.2 k b)) (arrayMeasure ν)
                    theorem Bandits.ArrayModel.indepFun_snd_apply_pullCount_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (m n : ) :
                    ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.2 m a) ({ω : probSpace α R | action alg (n + 1) ω = a Learning.pullCount (action alg) a (n + 1) ω = m}.indicator fun (x : probSpace α R) => 1) (arrayMeasure ν)
                    theorem Bandits.ArrayModel.indepFun_todo {α : Type u_3} {β : Type u_4} {γ : Type u_5} {δ : Type u_6} { : MeasurableSpace α} { : MeasurableSpace β} { : MeasurableSpace γ} { : MeasurableSpace δ} [MeasurableSingletonClass δ] {μ : MeasureTheory.Measure α} {X : αβ} {Y : αγ} (hXY : ProbabilityTheory.IndepFun X Y μ) (hY : Measurable Y) {Z : γδ} (hZ : Measurable Z) (z : δ) :
                    theorem Bandits.ArrayModel.indepFun_snd_hist_cond {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [StandardBorelSpace R] [Nonempty R] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (a : α) (n m : ) :
                    ProbabilityTheory.IndepFun (fun (ω : probSpace α R) => ω.2 m a) (fun (x : probSpace α R) => hist alg x n) (arrayMeasure ν)[|(fun (ω : probSpace α R) => (action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]

                    The conditional distribution of the reward at time n + 1, given the action at time n + 1 and the number of times that action has been pulled before time n + 1, is equal to the kernel ν.

                    theorem Bandits.ArrayModel.reward_ae_eq_cond {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) (a : α) (n m : ) :
                    reward alg (n + 1) =ᵐ[(arrayMeasure ν)[|(fun (ω : probSpace α R) => (action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]] fun (ω : probSpace α R) => ω.2 m a

                    The conditional distribution of the reward at time n + 1, given the history up to time n, the action at time n + 1, and the number of times that action has been pulled before time n + 1, is equal to the kernel ν.

                    theorem Bandits.ArrayModel.condIndepFun_reward_hist {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
                    ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (fun (ω : probSpace α R) => (action alg (n + 1) ω, Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) inferInstance) (reward alg (n + 1)) (fun (x : probSpace α R) => hist alg x n) (arrayMeasure ν)

                    The reward at time n + 1 is conditionally independent of the history up to time n, given the action at time n + 1 and the number of times that action has been pulled before time n + 1.

                    The conditional distribution of the reward at time n + 1, given the history up to time n and the action at time n + 1, is equal to the kernel ν.

                    theorem Bandits.ArrayModel.hasCondDistrib_action {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
                    ProbabilityTheory.HasCondDistrib (action alg (n + 1)) (fun (ω : probSpace α R) (i : (Finset.Iic n)) => (action alg (↑i) ω, reward alg (↑i) ω)) (alg.policy n) (arrayMeasure ν)
                    theorem Bandits.ArrayModel.hasCondDistrib_reward {α : Type u_1} {R : Type u_2} { : MeasurableSpace α} {mR : MeasurableSpace R} [Nonempty α] [StandardBorelSpace α] [DecidableEq α] [Countable α] [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm α R) (ν : ProbabilityTheory.Kernel α R) [ProbabilityTheory.IsMarkovKernel ν] (n : ) :
                    ProbabilityTheory.HasCondDistrib (reward alg (n + 1)) (fun (ω : probSpace α R) => (fun (i : (Finset.Iic n)) => (action alg (↑i) ω, reward alg (↑i) ω), action alg (n + 1) ω)) ((Learning.stationaryEnv ν).feedback n) (arrayMeasure ν)