Documentation

LeanBandits.UCB

UCB algorithm #

noncomputable def Bandits.ucbWidth {α : Type u_1} (c : ) (N : α) (t : ) (a : α) :
Equations
Instances For
    noncomputable def Bandits.ucbArm {α : Type u_1} [Fintype α] [Nonempty α] (c : ) (μ : α) (N : α) (t : ) :
    α
    Equations
    Instances For
      theorem Bandits.le_ucb {α : Type u_1} {t : } [Fintype α] [Nonempty α] {c : } {μ : α} {N : α} (a : α) :
      μ a + ucbWidth c N t a μ (ucbArm c μ N t) + ucbWidth c N t (ucbArm c μ N t)
      theorem Bandits.gap_ucbArm_le_two_mul_ucbWidth {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } {t : } [Fintype α] [Nonempty α] {c : } {μ : α} {N : α} (h_best : (x : ), id x ν (bestArm ν) μ (bestArm ν) + ucbWidth c N t (bestArm ν)) (h_ucb : μ (ucbArm c μ N t) - ucbWidth c N t (ucbArm c μ N t) (x : ), id x ν (ucbArm c μ N t)) :
      gap ν (ucbArm c μ N t) 2 * ucbWidth c N t (ucbArm c μ N t)
      theorem Bandits.N_ucbArm_le {α : Type u_1} { : MeasurableSpace α} {ν : ProbabilityTheory.Kernel α } {t : } [Fintype α] [Nonempty α] {c : } {μ : α} {N : α} (h_best : (x : ), id x ν (bestArm ν) μ (bestArm ν) + ucbWidth c N t (bestArm ν)) (h_ucb : μ (ucbArm c μ N t) - ucbWidth c N t (ucbArm c μ N t) (x : ), id x ν (ucbArm c μ N t)) :
      (N (ucbArm c μ N t)) 4 * c * Real.log t / gap ν (ucbArm c μ N t) ^ 2