Tools to build bandit algorithms #
theorem
measurable_encode
{α : Type u_1}
{x✝ : MeasurableSpace α}
[Encodable α]
[MeasurableSingletonClass α]
:
theorem
measurableEmbedding_encode
(α : Type u_1)
{x✝ : MeasurableSpace α}
[Encodable α]
[MeasurableSingletonClass α]
:
theorem
measurableSet_isMax
{𝓧 : Type u_1}
{𝓨 : Type u_2}
{α : Type u_3}
{m𝓧 : MeasurableSpace 𝓧}
{mα : MeasurableSpace α}
[TopologicalSpace α]
[LinearOrder α]
[OpensMeasurableSpace α]
[OrderClosedTopology α]
[SecondCountableTopology α]
[Countable 𝓨]
{f : 𝓧 → 𝓨 → α}
(hf : ∀ (y : 𝓨), Measurable fun (x : 𝓧) => f x y)
(y : 𝓨)
:
theorem
exists_isMaxOn'
{𝓧 : Type u_1}
{𝓨 : Type u_2}
{α : Type u_4}
[LinearOrder α]
[Nonempty 𝓨]
[Finite 𝓨]
[Encodable 𝓨]
(f : 𝓧 → 𝓨 → α)
(x : 𝓧)
:
∃ (n : ℕ) (y : 𝓨), n = Encodable.encode y ∧ ∀ (z : 𝓨), f x z ≤ f x y
noncomputable def
measurableArgmax
{𝓧 : Type u_1}
{𝓨 : Type u_2}
{α : Type u_3}
{m𝓨 : MeasurableSpace 𝓨}
[LinearOrder α]
[Nonempty 𝓨]
[Finite 𝓨]
[Encodable 𝓨]
[MeasurableSingletonClass 𝓨]
(f : 𝓧 → 𝓨 → α)
[(x : 𝓧) → DecidablePred fun (n : ℕ) => ∃ (y : 𝓨), n = Encodable.encode y ∧ ∀ (z : 𝓨), f x z ≤ f x y]
(x : 𝓧)
:
𝓨
A measurable argmax function.
Equations
- measurableArgmax f x = ⋯.invFun (Nat.find ⋯)
Instances For
theorem
measurable_measurableArgmax
{𝓧 : Type u_1}
{𝓨 : Type u_2}
{α : Type u_3}
{m𝓧 : MeasurableSpace 𝓧}
{m𝓨 : MeasurableSpace 𝓨}
{mα : MeasurableSpace α}
[TopologicalSpace α]
[LinearOrder α]
[OpensMeasurableSpace α]
[OrderClosedTopology α]
[SecondCountableTopology α]
[Nonempty 𝓨]
[Finite 𝓨]
[Encodable 𝓨]
[MeasurableSingletonClass 𝓨]
{f : 𝓧 → 𝓨 → α}
[(x : 𝓧) → DecidablePred fun (n : ℕ) => ∃ (y : 𝓨), n = Encodable.encode y ∧ ∀ (z : 𝓨), f x z ≤ f x y]
(hf : ∀ (y : 𝓨), Measurable fun (x : 𝓧) => f x y)
:
theorem
isMaxOn_measurableArgmax
{𝓧 : Type u_1}
{𝓨 : Type u_2}
{m𝓨 : MeasurableSpace 𝓨}
{α : Type u_4}
[LinearOrder α]
[Nonempty 𝓨]
[Finite 𝓨]
[Encodable 𝓨]
[MeasurableSingletonClass 𝓨]
(f : 𝓧 → 𝓨 → α)
[(x : 𝓧) → DecidablePred fun (n : ℕ) => ∃ (y : 𝓨), n = Encodable.encode y ∧ ∀ (z : 𝓨), f x z ≤ f x y]
(x : 𝓧)
(z : 𝓨)
:
noncomputable def
Bandits.sumRewards'
{α : Type u_1}
[DecidableEq α]
(n : ℕ)
(h : { x : ℕ // x ∈ Finset.Iic n } → α × ℝ)
(a : α)
:
Sum of rewards of arm a
up to (and including) time n
.
Equations
Instances For
noncomputable def
Bandits.empMean'
{α : Type u_1}
[DecidableEq α]
(n : ℕ)
(h : { x : ℕ // x ∈ Finset.Iic n } → α × ℝ)
(a : α)
:
Empirical mean of arm a
at time n
.
Equations
- Bandits.empMean' n h a = Bandits.sumRewards' n h a / ↑(Bandits.pullCount' n h a)
Instances For
theorem
Bandits.measurable_pullCount'
{α : Type u_1}
[DecidableEq α]
[MeasurableSpace α]
[MeasurableSingletonClass α]
(n : ℕ)
(a : α)
:
Measurable fun (h : { x : ℕ // x ∈ Finset.Iic n } → α × ℝ) => pullCount' n h a
theorem
Bandits.measurable_sumRewards'
{α : Type u_1}
[DecidableEq α]
[MeasurableSpace α]
[MeasurableSingletonClass α]
(n : ℕ)
(a : α)
:
Measurable fun (h : { x : ℕ // x ∈ Finset.Iic n } → α × ℝ) => sumRewards' n h a
theorem
Bandits.measurable_empMean'
{α : Type u_1}
[DecidableEq α]
[MeasurableSpace α]
[MeasurableSingletonClass α]
(n : ℕ)
(a : α)
: