Skip to content

Commit

Permalink
modified accelerator for variable timestep
Browse files Browse the repository at this point in the history
simplify expression

format

rm typo in UKI that causes warning

needs a sqrt

remove r

add ref into docstring

re-add original nesterov scheme as ConstantStepNesterovAccelerator for comparisons

format

github actions takes ages, processing bug I expect, increasing timeout
  • Loading branch information
odunbar committed Oct 30, 2023
1 parent 02d8ad6 commit 385a04c
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 21 deletions.
2 changes: 1 addition & 1 deletion bors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ status = [
'format',
]
delete_merged_branches = true
timeout_sec = 3600
timeout_sec = 7200
block_labels = [ "do-not-merge-yet" ]
127 changes: 120 additions & 7 deletions src/Accelerators.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# included in EnsembleKalmanProcess.jl

export DefaultAccelerator, NesterovAccelerator
export DefaultAccelerator, NesterovAccelerator, ConstantStepNesterovAccelerator
export accelerate!, set_initial_acceleration!

"""
Expand All @@ -10,21 +10,52 @@ Default accelerator provides no acceleration, runs traditional EKI
"""
struct DefaultAccelerator <: Accelerator end



"""
$(TYPEDEF)
Accelerator that adapts Nesterov's momentum method for EKI.
Accelerator that adapts a Constant-timestep version of Nesterov's momentum method for EKI.
Stores a previous state value u_prev for computational purposes (note this is distinct from state returned as "ensemble value")
$(TYPEDFIELDS)
"""
mutable struct NesterovAccelerator{FT <: AbstractFloat} <: Accelerator
mutable struct ConstantStepNesterovAccelerator{FT <: AbstractFloat} <: Accelerator
r::FT
u_prev::Any
end

function NesterovAccelerator(r = 3.0, initial = Float64[])
return NesterovAccelerator(r, initial)
function ConstantStepNesterovAccelerator(r = 3.0, initial = Float64[])
return ConstantStepNesterovAccelerator(r, initial)
end


"""
Sets u_prev to the initial parameter values
"""
function set_ICs!(
accelerator::ConstantStepNesterovAccelerator{FT},
u::MA,
) where {FT <: AbstractFloat, MA <: AbstractMatrix}
accelerator.u_prev = u
end


"""
$(TYPEDEF)
Accelerator that adapts Nesterov's momentum method for EKI.
Stores a previous state value u_prev for computational purposes (note this is distinct from state returned as "ensemble value")
$(TYPEDFIELDS)
"""
mutable struct NesterovAccelerator{FT <: AbstractFloat} <: Accelerator
u_prev::Array{FT}
θ_prev::FT
end

function NesterovAccelerator(initial = Float64[])
return NesterovAccelerator(initial, 1.0)
end


Expand All @@ -48,17 +79,27 @@ end

"""
Performs state update with modified Nesterov momentum approach.
The dependence of the momentum parameter for variable timestep can be found e.g. here "https://www.damtp.cam.ac.uk/user/hf323/M19-OPT/lecture5.pdf"
"""
function accelerate!(
ekp::EnsembleKalmanProcess{FT, IT, P, LRS, NesterovAccelerator{FT}},
u::MA,
) where {FT <: AbstractFloat, IT <: Int, P <: Process, LRS <: LearningRateScheduler, MA <: AbstractMatrix}
## update "v" state:
k = get_N_iterations(ekp) + 2
v = u .+ (1 - ekp.accelerator.r / k) * (u .- ekp.accelerator.u_prev)
#v = u .+ 2 / (get_N_iterations(ekp) + 2) * (u .- ekp.accelerator.u_prev)
Δt_prev = length(ekp.Δt) == 1 ? 1 : ekp.Δt[end - 1]
Δt = ekp.Δt[end]
θ_prev = ekp.accelerator.θ_prev

# condition θ_prev^2 * (1 - θ) * Δt \leq Δt_prev * θ^2
a = sqrt(θ_prev^2 * Δt / Δt_prev)
θ = (-a + sqrt(a^2 + 4)) / 2

v = u .+ θ * (1 / θ_prev - 1) * (u .- ekp.accelerator.u_prev)

## update "u" state:
ekp.accelerator.u_prev = u
ekp.accelerator.θ_prev = θ

## push "v" state to EKP object
push!(ekp.u, DataContainer(v, data_are_columns = true))
Expand All @@ -67,13 +108,85 @@ end

"""
State update method for UKI with Nesterov Accelerator.
The dependence of the momentum parameter for variable timestep can be found e.g. here "https://www.damtp.cam.ac.uk/user/hf323/M19-OPT/lecture5.pdf"
Performs identical update as with other methods, but requires reconstruction of mean and covariance of the accelerated positions prior to saving.
"""
function accelerate!(
uki::EnsembleKalmanProcess{FT, IT, P, LRS, NesterovAccelerator{FT}},
u::AM,
) where {FT <: AbstractFloat, IT <: Int, P <: Unscented, LRS <: LearningRateScheduler, AM <: AbstractMatrix}

#identical update stage as before
## update "v" state:
Δt_prev = length(uki.Δt) == 1 ? 1 : uki.Δt[end - 1]
Δt = uki.Δt[end]
θ_prev = uki.accelerator.θ_prev


# condition θ_prev^2 * (1 - θ) * Δt \leq Δt_prev * θ^2
a = sqrt(θ_prev^2 * Δt / Δt_prev)
θ = (-a + sqrt(a^2 + 4)) / 2

v = u .+ θ * (1 / θ_prev - 1) * (u .- uki.accelerator.u_prev)

## update "u" state:
uki.accelerator.u_prev = u
uki.accelerator.θ_prev = θ

## push "v" state to UKI object
push!(uki.u, DataContainer(v, data_are_columns = true))

# additional complication: the stored "u_mean" and "uu_cov" are not the mean/cov of this ensemble
# the ensemble comes from the prediction operation acted upon this. we invert the prediction of the mean/cov of the sigma ensemble from u_mean/uu_cov
# u_mean = 1/alpha*(mean(v) - r) + r
# uu_cov = 1/alpha^2*(cov(v) - Σ_ω)
α_reg = uki.process.α_reg
r = uki.process.r
Σ_ω = uki.process.Σ_ω
Δt = uki.Δt[end]

v_mean = construct_mean(uki, v)
vv_cov = construct_cov(uki, v, v_mean)
u_mean = 1 / α_reg * (v_mean - r) + r
uu_cov = (1 / α_reg)^2 * (vv_cov - Σ_ω * Δt)

# overwrite the saved u_mean/uu_cov
uki.process.u_mean[end] = u_mean # N_ens x N_params
uki.process.uu_cov[end] = uu_cov # N_ens x N_data

end




"""
Performs state update with modified constant timestep Nesterov momentum approach.
"""
function accelerate!(
ekp::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}},
u::MA,
) where {FT <: AbstractFloat, IT <: Int, P <: Process, LRS <: LearningRateScheduler, MA <: AbstractMatrix}
## update "v" state:
k = get_N_iterations(ekp) + 2
v = u .+ (1 - ekp.accelerator.r / k) * (u .- ekp.accelerator.u_prev)

## update "u" state:
ekp.accelerator.u_prev = u

## push "v" state to EKP object
push!(ekp.u, DataContainer(v, data_are_columns = true))
end


"""
State update method for UKI with constant timestep Nesterov Accelerator.
Performs identical update as with other methods, but requires reconstruction of mean and covariance of the accelerated positions prior to saving.
"""
function accelerate!(
uki::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}},
u::AM,
) where {FT <: AbstractFloat, IT <: Int, P <: Unscented, LRS <: LearningRateScheduler, AM <: AbstractMatrix}

#identical update stage as before
## update "v" state:
k = get_N_iterations(uki) + 2
Expand Down
2 changes: 1 addition & 1 deletion src/EnsembleKalmanProcess.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ function EnsembleKalmanProcess(
end
AC = typeof(acc)

if AC <: NesterovAccelerator
if !(AC <: DefaultAccelerator)
set_ICs!(acc, params)
if P <: Sampler
@warn "Acceleration is experimental for Sampler processes and may affect convergence."
Expand Down
5 changes: 1 addition & 4 deletions src/UnscentedKalmanInversion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,7 @@ end
UKI prediction step : generate sigma points.
"""
function update_ensemble_prediction!(
process::Unscented,
Δt::FT,
) where {FT <: AbstractFloat, AV <: AbstractVector, AM <: AbstractMatrix}
function update_ensemble_prediction!(process::Unscented, Δt::FT) where {FT <: AbstractFloat}

process.iter += 1
# update evolution covariance matrix
Expand Down
50 changes: 42 additions & 8 deletions test/EnsembleKalmanProcess/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,29 @@ end
@testset "Accelerators" begin
# Get an inverse problem
y_obs, G, Γy, _ = inv_problems[end - 2] # additive noise inv problem (deterministic map)
inv_sqrt_Γy = sqrt(inv(Γy))

rng = Random.MersenneTwister(rng_seed)
N_ens_tmp = 5
initial_ensemble = EKP.construct_initial_ensemble(rng, prior, N_ens_tmp)

# build accelerated and non-accelerated processes
ekiobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion(), accelerator = NesterovAccelerator())
eksobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior), accelerator = NesterovAccelerator())
ekiobj_const = EKP.EnsembleKalmanProcess(
initial_ensemble,
y_obs,
Γy,
Inversion(),
accelerator = ConstantStepNesterovAccelerator(),
)
eksobj_const = EKP.EnsembleKalmanProcess(
initial_ensemble,
y_obs,
Γy,
Sampler(prior),
accelerator = ConstantStepNesterovAccelerator(),
)
ekiobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion())
eksobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior))
ekiobj_noacc_specified =
Expand All @@ -99,26 +115,45 @@ end
## test EKP object's accelerator type is consistent (EKP constructor reassigns object in some cases)
@test typeof(ekiobj.accelerator) <: NesterovAccelerator
@test typeof(eksobj.accelerator) <: NesterovAccelerator
@test typeof(ekiobj_const.accelerator) <: ConstantStepNesterovAccelerator
@test typeof(eksobj_const.accelerator) <: ConstantStepNesterovAccelerator
@test typeof(ekiobj_noacc.accelerator) <: DefaultAccelerator
@test typeof(eksobj_noacc.accelerator) <: DefaultAccelerator
@test typeof(ekiobj_noacc_specified.accelerator) <: DefaultAccelerator
@test typeof(eksobj_noacc_specified.accelerator) <: DefaultAccelerator

## test NesterovAccelerators satisfy desired ICs
@test ekiobj.accelerator.r 3.0
@test ekiobj.accelerator.u_prev == initial_ensemble
@test eksobj.accelerator.r 3.0
@test ekiobj.accelerator.θ_prev == 1.0
@test eksobj.accelerator.u_prev == initial_ensemble
@test eksobj.accelerator.θ_prev == 1.0

@test ekiobj_const.accelerator.r 3.0
@test ekiobj_const.accelerator.u_prev == initial_ensemble
@test eksobj_const.accelerator.r 3.0
@test eksobj_const.accelerator.u_prev == initial_ensemble

## test method convergence
# Note: this test only requires that the final ensemble is an improvement on the initial ensemble,
# NOT that the accelerated processes are more effective than the default, as this is not guaranteed.
# Specific cost values are printed to give an idea of acceleration.
processes = [Inversion(), TransformInversion(inv(Γy)), Unscented(prior; impose_prior = true), Sampler(prior)]
schedulers = [repeat([DefaultScheduler(0.1)], 3)..., EKSStableScheduler()]
processes = [
repeat([Inversion(), TransformInversion(inv(Γy)), Unscented(prior; impose_prior = true)], 2)...,
Sampler(prior),
]
schedulers = [
repeat([DefaultScheduler(0.1)], 3)..., # for constant timestep Nesterov
repeat([DataMisfitController(terminate_at = 100)], 3)..., # for general Nesterov
EKSStableScheduler(), # for general Nesterov
]
for (process, scheduler) in zip(processes, schedulers)
accelerators = [DefaultAccelerator(), NesterovAccelerator()]
N_iters = [5, 5, 5, 5]
if typeof(scheduler) <: DefaultScheduler
accelerators = [DefaultAccelerator(), ConstantStepNesterovAccelerator(), NesterovAccelerator()]
N_iters = [20, 20, 20]
else #don't test the constantstep accelerator with variable timesteppers
accelerators = [DefaultAccelerator(), NesterovAccelerator()]
N_iters = [20, 20]
end
init_means = []
final_means = []

Expand Down Expand Up @@ -160,9 +195,8 @@ end
push!(init_means, vec(mean(get_u_prior(ekpobj), dims = 2)))
push!(final_means, vec(mean(get_u_final(ekpobj), dims = 2)))

inv_sqrt_Γy = sqrt(inv(Γy))
cost_initial =
norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, init_means[end]))))
norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, initial_ensemble))))
cost_final =
norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, final_means[end]))))
@info "Convergence:" cost_initial cost_final
Expand Down

0 comments on commit 385a04c

Please sign in to comment.