Skip to content

Commit

Permalink
re-add original nesterov scheme as ConstantStepNesterovAccelerator fo…
Browse files Browse the repository at this point in the history
…r comparisons
  • Loading branch information
odunbar committed Oct 28, 2023
1 parent 7ac2e4d commit ce0d3d5
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 10 deletions.
92 changes: 91 additions & 1 deletion src/Accelerators.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# included in EnsembleKalmanProcess.jl

export DefaultAccelerator, NesterovAccelerator
export DefaultAccelerator, NesterovAccelerator, ConstantStepNesterovAccelerator
export accelerate!, set_initial_acceleration!

"""
Expand All @@ -10,6 +10,34 @@ Default accelerator provides no acceleration, runs traditional EKI
"""
struct DefaultAccelerator <: Accelerator end



"""
$(TYPEDEF)
Accelerator that adapts a Constant-timestep version of Nesterov's momentum method for EKI.
Stores a previous state value u_prev for computational purposes (note this is distinct from state returned as "ensemble value")
$(TYPEDFIELDS)
"""
mutable struct ConstantStepNesterovAccelerator{FT <: AbstractFloat} <: Accelerator
r::FT
u_prev::Any
end

function ConstantStepNesterovAccelerator(r = 3.0, initial = Float64[])
return ConstantStepNesterovAccelerator(r, initial)
end


"""
Sets u_prev to the initial parameter values
"""
function set_ICs!(accelerator::ConstantStepNesterovAccelerator{FT}, u::MA) where {FT <: AbstractFloat, MA <: AbstractMatrix}
accelerator.u_prev = u
end


"""
$(TYPEDEF)
Expand Down Expand Up @@ -124,3 +152,65 @@ function accelerate!(
uki.process.uu_cov[end] = uu_cov # N_ens x N_data

end




"""
Performs state update with modified constant timestep Nesterov momentum approach.
"""
function accelerate!(
ekp::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}},
u::MA,
) where {FT <: AbstractFloat, IT <: Int, P <: Process, LRS <: LearningRateScheduler, MA <: AbstractMatrix}
## update "v" state:
k = get_N_iterations(ekp) + 2
v = u .+ (1 - ekp.accelerator.r / k) * (u .- ekp.accelerator.u_prev)

## update "u" state:
ekp.accelerator.u_prev = u

## push "v" state to EKP object
push!(ekp.u, DataContainer(v, data_are_columns = true))
end


"""
State update method for UKI with constant timestep Nesterov Accelerator.
Performs identical update as with other methods, but requires reconstruction of mean and covariance of the accelerated positions prior to saving.
"""
function accelerate!(
uki::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}},
u::AM,
) where {FT <: AbstractFloat, IT <: Int, P <: Unscented, LRS <: LearningRateScheduler, AM <: AbstractMatrix}

#identical update stage as before
## update "v" state:
k = get_N_iterations(uki) + 2
v = u .+ (1 - uki.accelerator.r / k) * (u .- uki.accelerator.u_prev)

## update "u" state:
uki.accelerator.u_prev = u

## push "v" state to UKI object
push!(uki.u, DataContainer(v, data_are_columns = true))

# additional complication: the stored "u_mean" and "uu_cov" are not the mean/cov of this ensemble
# the ensemble comes from the prediction operation acted upon this. we invert the prediction of the mean/cov of the sigma ensemble from u_mean/uu_cov
# u_mean = 1/alpha*(mean(v) - r) + r
# uu_cov = 1/alpha^2*(cov(v) - Σ_ω)
α_reg = uki.process.α_reg
r = uki.process.r
Σ_ω = uki.process.Σ_ω
Δt = uki.Δt[end]

v_mean = construct_mean(uki, v)
vv_cov = construct_cov(uki, v, v_mean)
u_mean = 1 / α_reg * (v_mean - r) + r
uu_cov = (1 / α_reg)^2 * (vv_cov - Σ_ω * Δt)

# overwrite the saved u_mean/uu_cov
uki.process.u_mean[end] = u_mean # N_ens x N_params
uki.process.uu_cov[end] = uu_cov # N_ens x N_data

end
2 changes: 1 addition & 1 deletion src/EnsembleKalmanProcess.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ function EnsembleKalmanProcess(
end
AC = typeof(acc)

if AC <: NesterovAccelerator
if !(AC <: DefaultAccelerator)
set_ICs!(acc, params)
if P <: Sampler
@warn "Acceleration is experimental for Sampler processes and may affect convergence."
Expand Down
29 changes: 21 additions & 8 deletions test/EnsembleKalmanProcess/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ end
# build accelerated and non-accelerated processes
ekiobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion(), accelerator = NesterovAccelerator())
eksobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior), accelerator = NesterovAccelerator())
ekiobj_const = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion(), accelerator = ConstantStepNesterovAccelerator())
eksobj_const = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior), accelerator = ConstantStepNesterovAccelerator())
ekiobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion())
eksobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior))
ekiobj_noacc_specified =
Expand All @@ -101,6 +103,8 @@ end
## test EKP object's accelerator type is consistent (EKP constructor reassigns object in some cases)
@test typeof(ekiobj.accelerator) <: NesterovAccelerator
@test typeof(eksobj.accelerator) <: NesterovAccelerator
@test typeof(ekiobj_const.accelerator) <: ConstantStepNesterovAccelerator
@test typeof(eksobj_const.accelerator) <: ConstantStepNesterovAccelerator
@test typeof(ekiobj_noacc.accelerator) <: DefaultAccelerator
@test typeof(eksobj_noacc.accelerator) <: DefaultAccelerator
@test typeof(ekiobj_noacc_specified.accelerator) <: DefaultAccelerator
Expand All @@ -112,27 +116,36 @@ end
@test eksobj.accelerator.u_prev == initial_ensemble
@test eksobj.accelerator.θ_prev == 1.0

@test ekiobj_const.accelerator.r 3.0
@test ekiobj_const.accelerator.u_prev == initial_ensemble
@test eksobj_const.accelerator.r 3.0
@test eksobj_const.accelerator.u_prev == initial_ensemble

## test method convergence
# Note: this test only requires that the final ensemble is an improvement on the initial ensemble,
# NOT that the accelerated processes are more effective than the default, as this is not guaranteed.
# Specific cost values are printed to give an idea of acceleration.
processes = [
repeat([
Inversion(),
TransformInversion(inv(Γy)),
Unscented(prior; impose_prior = true),
Inversion(),
TransformInversion(inv(Γy)),
Unscented(prior; impose_prior = true),
],2)...,
Sampler(prior),
]
schedulers = [
repeat([DefaultScheduler(0.1)], 3)...,
repeat([DataMisfitController(terminate_at = 100)], 3)...,
EKSStableScheduler(),
repeat([DefaultScheduler(0.1)], 3)..., # for constant timestep Nesterov
repeat([DataMisfitController(terminate_at = 100)], 3)..., # for general Nesterov
EKSStableScheduler(), # for general Nesterov
]
for (process, scheduler) in zip(processes, schedulers)
accelerators = [DefaultAccelerator(), NesterovAccelerator()]
N_iters = [20, 20]
if typeof(scheduler) <: DefaultScheduler
accelerators = [DefaultAccelerator(), ConstantStepNesterovAccelerator(), NesterovAccelerator()]
N_iters = [20, 20, 20]
else #don't test the constantstep accelerator with variable timesteppers
accelerators = [DefaultAccelerator(), NesterovAccelerator()]
N_iters = [20, 20]
end
init_means = []
final_means = []

Expand Down

0 comments on commit ce0d3d5

Please sign in to comment.