Documentation

LeanBandits.Regret

Regret #

Definitions of regret, gaps, pull counts #

noncomputable def Bandits.regret {α : Type u_1} { : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ) (t : ) (h : α × ) :

Regret of a sequence of pulls k : ℕ → α at time t for the reward kernel ν ; Kernel α ℝ.

Equations
Instances For
    noncomputable def Bandits.gap {α : Type u_1} { : MeasurableSpace α} (ν : ProbabilityTheory.Kernel α ) (a : α) :

    Gap of an arm a: difference between the highest mean of the arms and the mean of a.

    Equations
    Instances For
      theorem Bandits.gap_nonneg {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } {a : α} [Fintype α] :
      0 gap ν a
      noncomputable def Bandits.pullCount {α : Type u_1} [DecidableEq α] (a : α) (t : ) (h : α × ) :

      Number of times arm a was pulled up to time t (excluding t).

      Equations
      Instances For
        @[simp]
        theorem Bandits.pullCount_zero {α : Type u_1} [DecidableEq α] (a : α) (h : α × ) :
        pullCount a 0 h = 0
        theorem Bandits.pullCount_one {α : Type u_1} [DecidableEq α] {h : α × } {a : α} :
        pullCount a 1 h = if arm 0 h = a then 1 else 0
        theorem Bandits.monotone_pullCount {α : Type u_1} [DecidableEq α] (a : α) (h : α × ) :
        Monotone fun (x : ) => pullCount a x h
        theorem Bandits.pullCount_eq_pullCount_add_one {α : Type u_1} [DecidableEq α] (t : ) (h : α × ) :
        pullCount (arm t h) (t + 1) h = pullCount (arm t h) t h + 1
        theorem Bandits.pullCount_eq_pullCount {α : Type u_1} [DecidableEq α] {h : α × } {t : } {a : α} (ha : arm t h a) :
        pullCount a (t + 1) h = pullCount a t h
        theorem Bandits.pullCount_add_one {α : Type u_1} [DecidableEq α] {h : α × } {t : } {a : α} :
        pullCount a (t + 1) h = pullCount a t h + if arm t h = a then 1 else 0
        theorem Bandits.pullCount_eq_sum {α : Type u_1} [DecidableEq α] (a : α) (t : ) (h : α × ) :
        pullCount a t h = sFinset.range t, if arm s h = a then 1 else 0
        theorem Bandits.pullCount_le {α : Type u_1} [DecidableEq α] (a : α) (t : ) (h : α × ) :
        pullCount a t h t
        noncomputable def Bandits.stepsUntil {α : Type u_1} [DecidableEq α] (a : α) (m : ) (h : α × ) :

        Number of steps until arm a was pulled exactly m times.

        Equations
        Instances For
          theorem Bandits.stepsUntil_eq_top_iff {α : Type u_1} [DecidableEq α] {h : α × } {m : } {a : α} :
          stepsUntil a m h = ∀ (s : ), pullCount a (s + 1) h m
          theorem Bandits.stepsUntil_zero_of_ne {α : Type u_1} [DecidableEq α] {h : α × } {a : α} (hka : arm 0 h a) :
          stepsUntil a 0 h = 0
          theorem Bandits.stepsUntil_zero_of_eq {α : Type u_1} [DecidableEq α] {h : α × } {a : α} (hka : arm 0 h = a) :
          theorem Bandits.stepsUntil_eq_dite {α : Type u_1} [DecidableEq α] (a : α) (m : ) (h : α × ) [Decidable (∃ (s : ), pullCount a (s + 1) h = m)] :
          stepsUntil a m h = if h_1 : ∃ (s : ), pullCount a (s + 1) h = m then (Nat.find h_1) else
          theorem Bandits.stepsUntil_pullCount_le {α : Type u_1} [DecidableEq α] (h : α × ) (a : α) (t : ) :
          stepsUntil a (pullCount a (t + 1) h) h t
          theorem Bandits.stepsUntil_pullCount_eq {α : Type u_1} [DecidableEq α] (h : α × ) (t : ) :
          stepsUntil (arm t h) (pullCount (arm t h) (t + 1) h) h = t
          theorem Bandits.stepsUntil_one_of_eq {α : Type u_1} [DecidableEq α] {h : α × } {a : α} (hka : arm 0 h = a) :
          stepsUntil a 1 h = 0

          If we pull arm a at time 0, the first time at which it is pulled once is 0.

          theorem Bandits.stepsUntil_eq_zero_iff {α : Type u_1} [DecidableEq α] {h : α × } {m : } {a : α} :
          stepsUntil a m h = 0 m = 0 arm 0 h a m = 1 arm 0 h = a
          theorem Bandits.arm_stepsUntil {α : Type u_1} [DecidableEq α] {h : α × } {m : } {a : α} (hm : m 0) (h_exists : ∃ (s : ), pullCount a (s + 1) h = m) :
          arm (stepsUntil a m h).toNat h = a
          theorem Bandits.arm_eq_of_stepsUntil_eq_coe {α : Type u_1} [DecidableEq α] {m n : } {a : α} {ω : α × } (hm : m 0) (h : stepsUntil a m ω = n) :
          arm n ω = a
          theorem Bandits.stepsUntil_eq_congr {α : Type u_1} [DecidableEq α] {h : α × } {m n : } {a : α} {h' : α × } (h_eq : in, arm i h = arm i h') :
          stepsUntil a m h = n stepsUntil a m h' = n
          theorem Bandits.pullCount_stepsUntil_add_one {α : Type u_1} [DecidableEq α] {h : α × } {m : } {a : α} (h_exists : ∃ (s : ), pullCount a (s + 1) h = m) :
          pullCount a (stepsUntil a m h + 1).toNat h = m
          theorem Bandits.pullCount_stepsUntil {α : Type u_1} [DecidableEq α] {h : α × } {m : } {a : α} (hm : m 0) (h_exists : ∃ (s : ), pullCount a (s + 1) h = m) :
          pullCount a (stepsUntil a m h).toNat h = m - 1
          def Bandits.sumRewards {α : Type u_1} [DecidableEq α] (a : α) (t : ) (h : α × ) :

          Sum of rewards obtained when pulling arm a up to time t (exclusive).

          Equations
          Instances For
            noncomputable def Bandits.empMean {α : Type u_1} [DecidableEq α] (a : α) (t : ) (h : α × ) :

            Empirical mean reward obtained when pulling arm a up to time t (exclusive).

            Equations
            Instances For
              theorem Bandits.sumRewards_eq_pullCount_mul_empMean {α : Type u_1} [DecidableEq α] {h : α × } {t : } {a : α} (h_pull : pullCount a t h 0) :
              sumRewards a t h = (pullCount a t h) * empMean a t h
              noncomputable def Bandits.rewardByCount {α : Type u_1} [DecidableEq α] (a : α) (m : ) (h : α × ) (z : α) :

              Reward obtained when pulling arm a for the m-th time.

              Equations
              Instances For
                theorem Bandits.rewardByCount_eq_ite {α : Type u_1} [DecidableEq α] (a : α) (m : ) (h : α × ) (z : α) :
                rewardByCount a m h z = if stepsUntil a m h = then z m a else reward (stepsUntil a m h).toNat h
                theorem Bandits.rewardByCount_of_stepsUntil_eq_top {α : Type u_1} [DecidableEq α] {m : } {a : α} {ω : (α × ) × (α)} (h : stepsUntil a m ω.1 = ) :
                rewardByCount a m ω.1 ω.2 = ω.2 m a
                theorem Bandits.rewardByCount_of_stepsUntil_eq_coe {α : Type u_1} [DecidableEq α] {m n : } {a : α} {ω : (α × ) × (α)} (h : stepsUntil a m ω.1 = n) :
                rewardByCount a m ω.1 ω.2 = reward n ω.1
                theorem Bandits.rewardByCount_pullCount_add_one_eq_reward {α : Type u_1} [DecidableEq α] (t : ) (h : α × ) (z : α) :
                rewardByCount (arm t h) (pullCount (arm t h) t h + 1) h z = reward t h
                theorem Bandits.sum_rewardByCount_eq_sumRewards {α : Type u_1} [DecidableEq α] (a : α) (t : ) (h : α × ) (z : α) :
                mFinset.Icc 1 (pullCount a t h), rewardByCount a m h z = sumRewards a t h
                theorem Bandits.sum_pullCount_mul {α : Type u_1} [DecidableEq α] [Fintype α] (h : α × ) (f : α) (t : ) :
                a : α, (pullCount a t h) * f a = sFinset.range t, f (arm s h)
                theorem Bandits.sum_pullCount {α : Type u_1} [DecidableEq α] {h : α × } {t : } [Fintype α] :
                a : α, pullCount a t h = t
                theorem Bandits.regret_eq_sum_pullCount_mul_gap {α : Type u_1} [DecidableEq α] { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } {h : α × } {t : } [Fintype α] :
                regret ν t h = a : α, (pullCount a t h) * gap ν a
                noncomputable def Bandits.bestArm {α : Type u_1} { : MeasurableSpace α} [Fintype α] [Nonempty α] (ν : ProbabilityTheory.Kernel α ) :
                α

                Arm with the highest mean.

                Equations
                Instances For
                  theorem Bandits.le_bestArm {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } [Fintype α] [Nonempty α] (a : α) :
                  (x : ), id x ν a (x : ), id x ν (bestArm ν)
                  theorem Bandits.gap_eq_bestArm_sub {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } {a : α} [Fintype α] [Nonempty α] :
                  gap ν a = (x : ), id x ν (bestArm ν) - (x : ), id x ν a
                  @[simp]
                  theorem Bandits.gap_bestArm {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } [Fintype α] [Nonempty α] :
                  gap ν (bestArm ν) = 0