Skip to content

Commit

Permalink
add spread check and consistent plot tool
Browse files Browse the repository at this point in the history
plot consistently

rm verbose

removed the inappropriate localizations in EKP/runtest

notation

formatting

UKI tested on nonlinear problems too

format

using \alpha to get convergence with UKI

100 iterations

update UKI

update default hyperparameters

format and remove hard-code flag

new 3D output test

format and all test pass

add abs in nonlinear inverse problem test; update UKI default

format

docs note for UKI impose_prior setting

docs update

default prior to initial cov/mean and make constructor a little more flexible

tests for default prior to init

codecov

format...
  • Loading branch information
odunbar committed Jun 14, 2023
1 parent c746772 commit e001485
Show file tree
Hide file tree
Showing 6 changed files with 400 additions and 211 deletions.
4 changes: 3 additions & 1 deletion docs/src/unscented_kalman_inversion.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ The free parameters in the unscented Kalman inversion are ``\alpha, r, \Sigma_{\

* otherwise ``\Lambda = C_0``, this allows that the converged covariance matrix is a weighted average between the posterior covariance matrix with an uninformative prior and ``C_0``.

In short, users only need to change the ``\alpha`` (`α_reg`), and the frequency to update the ``\Lambda`` (`update_freq`). The user can first try `α_reg = 1.0` and `update_freq = 0`.
In short, users only need to change the ``\alpha`` (`α_reg`), and the frequency to update the ``\Lambda`` to the current covariance (`update_freq`). The user can first try `α_reg = 1.0` and `update_freq = 0` (corresponding to ``\Lambda = C_0``).

!!! note "Preventing ensemble divergence"
If UKI suffers divergence (for example when inverse problems are not well-posed), one can prevent it by using Tikhonov regularization (see [Huang, Schneider, Stuart, 2022](https://doi.org/10.1016/j.jcp.2022.111262)). It is used by setting the `impose_prior = true` flag. In this mode, the free parameters are fixed to `α_reg = 1.0`, `update_freq = 1`.

## Implementation

Expand Down
73 changes: 60 additions & 13 deletions src/UnscentedKalmanInversion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ $(TYPEDFIELDS)
u0_mean::AbstractVector{FT},
uu0_cov::AbstractMatrix{FT};
α_reg::FT = 1.0,
update_freq::IT = 1,
update_freq::IT = 0,
modified_unscented_transform::Bool = true,
prior_mean::Union{AbstractVector{FT}, Nothing} = nothing,
impose_prior::Bool = false,
prior_mean::Any,
prior_cov::Any,
sigma_points::String = symmetric
) where {FT <: AbstractFloat, IT <: Int}
Expand All @@ -38,7 +40,14 @@ Inputs:
uninformative prior.
- `modified_unscented_transform`: Modification of the UKI quadrature given
in Huang et al (2021).
- `impose_prior`: using augmented system (Tikhonov regularization with Kalman inversion in Chada
et al 2020 and Huang et al (2022)) to regularize the inverse problem, which also imposes prior
for posterior estimation. If impose_prior == true, prior mean and prior cov must be provided.
This is recommended to use, especially when the number of observations is smaller than the number
of parameters (ill-posed inverse problems). When this is used, other regularizations are turned off
automatically.
- `prior_mean`: Prior mean used for regularization.
- `prior_cov`: Prior cov used for regularization.
- `sigma_points`: String of sigma point type, it can be `symmetric` with `2N_par+1`
ensemble members or `simplex` with `N_par+2` ensemble members.
Expand Down Expand Up @@ -67,19 +76,48 @@ mutable struct Unscented{FT <: AbstractFloat, IT <: Int} <: Process
r::AbstractVector{FT}
"update frequency"
update_freq::IT
"using augmented system (Tikhonov regularization with Kalman inversion in Chada
et al 2020 and Huang et al (2022)) to regularize the inverse problem, which also imposes prior
for posterior estimation."
impose_prior::Bool
"prior mean - defaults to initial mean"
prior_mean::Any
"prior covariance - defaults to initial covariance"
prior_cov::Any
"current iteration number"
iter::IT
end

function Unscented(
u0_mean::AbstractVector{FT},
uu0_cov::AbstractMatrix{FT};
u0_mean::VV,
uu0_cov::MM;
α_reg::FT = 1.0,
update_freq::IT = 1,
update_freq::IT = 0,
modified_unscented_transform::Bool = true,
prior_mean::Union{AbstractVector{FT}, Nothing} = nothing,
impose_prior::Bool = false,
prior_mean::Any = nothing,
prior_cov::Any = nothing,
sigma_points::String = "symmetric",
) where {FT <: AbstractFloat, IT <: Int}
) where {FT <: AbstractFloat, IT <: Int, VV <: AbstractVector, MM <: AbstractMatrix}

u0_mean = FT.(u0_mean)
uu0_cov = FT.(uu0_cov)
if impose_prior
if isnothing(prior_mean)
@info "`impose_prior=true` but `prior_mean=nothing`, taking initial mean as prior mean."
prior_mean = u0_mean
else
prior_mean = FT.(prior_mean)
end
if isnothing(prior_cov)
@info "`impose_prior=true` but `prior_cov=nothing`, taking initial covariance as prior covariance"
prior_cov = uu0_cov
else
prior_cov = FT.(prior_cov)
end
α_reg = 1.0
update_freq = 1
end

if sigma_points == "symmetric"
N_ens = 2 * size(u0_mean, 1) + 1
Expand All @@ -89,7 +127,6 @@ function Unscented(
throw(ArgumentError("sigma_points type is not recognized. Select from \"symmetric\" or \"simplex\". "))
end


N_par = size(u0_mean, 1)
# ensemble size

Expand Down Expand Up @@ -165,6 +202,9 @@ function Unscented(
α_reg,
r,
update_freq,
impose_prior,
prior_mean,
prior_cov,
iter,
)
end
Expand All @@ -174,7 +214,7 @@ function Unscented(prior::ParameterDistribution; kwargs...)
u0_mean = Vector(mean(prior)) # mean of unconstrained distribution
uu0_cov = Matrix(cov(prior)) # cov of unconstrained distribution

return Unscented(u0_mean, uu0_cov; kwargs...)
return Unscented(u0_mean, uu0_cov; prior_mean = u0_mean, prior_cov = uu0_cov, kwargs...)

end

Expand Down Expand Up @@ -236,10 +276,17 @@ function FailureHandler(process::Unscented, method::SampleSuccGauss)
cov_localized = uki.localizer.localize(cov_est)
uu_p_cov, ug_cov, gg_cov = get_cov_blocks(cov_localized, size(u_p, 1))

tmp = ug_cov / gg_cov

u_mean = u_p_mean + tmp * (obs_mean - g_mean)
uu_cov = uu_p_cov - tmp * ug_cov'
if uki.process.impose_prior
ug_cov_reg = [ug_cov uu_p_cov]
gg_cov_reg = [gg_cov ug_cov'; ug_cov uu_p_cov+uki.process.prior_cov / uki.Δt[end]]
tmp = ug_cov_reg / gg_cov_reg
u_mean = u_p_mean + tmp * [obs_mean - g_mean; uki.process.prior_mean - u_p_mean]
uu_cov = uu_p_cov - tmp * ug_cov_reg'
else
tmp = ug_cov / gg_cov
u_mean = u_p_mean + tmp * (obs_mean - g_mean)
uu_cov = uu_p_cov - tmp * ug_cov'
end

########### Save results
push!(uki.process.obs_pred, g_mean) # N_ens x N_data
Expand Down
120 changes: 106 additions & 14 deletions test/EnsembleKalmanProcess/inverse_problem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,64 @@ function linear_map(rng, n_obs)
end

"""
G_nonlinear(ϕ, rng, noise_dist, ϕ_star = 0.0)
G_nonlinear_old(ϕ, rng, noise_dist, ϕ_star = 0.0)
Noisy nonlinear map output given `ϕ` and multivariate noise.
The map: `(||ϕ-ϕ_star^*||_1, ||ϕ-ϕ_star||_2, ||ϕ-ϕ_star||_∞)`
Inputs:
- `ϕ` :: Input to the forward map (constrained space)
- `rng` :: Random number generator
- `noise_dist` :: Multivariate noise distribution from which stochasticity of map is sampled.
- `ϕ_star` :: Minimum of the function
- `add_or_mult_noise = nothing` :: additive ["add"] or multiplicative ["mult"] variability optional
"""
function G_nonlinear(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = nothing)
if ndims(ϕ) == 1
p = length(ϕ)
ϕ_new = reshape(ϕ, (p, 1))
else
ϕ_new = ϕ
end

true_output = zeros(3, size(ϕ_new, 2)) #l^1, l^2, l^inf norm
true_output[1, :] = reshape([norm(c, 1) for c in eachcol(ϕ_new .- ϕ_star)], 1, :)
true_output[2, :] = reshape([norm(c, 2) for c in eachcol(ϕ_new .- ϕ_star)], 1, :)
true_output[3, :] = reshape([maximum(abs.(c)) for c in eachcol(ϕ_new .- ϕ_star)], 1, :)


# Add noise from noise_dist
if isnothing(add_or_mult_noise)
#just copy the observation n_obs//3 times
output = repeat(true_output, length(noise_dist), 1) # (n_obs x n_ens)
else
output = zeros(length(noise_dist) * size(true_output, 1), size(true_output, 2))
internal_noise = rand(rng, noise_dist, size(true_output, 2))

if add_or_mult_noise == "add"
for i in 1:length(noise_dist)
output[(3 * (i - 1) + 1):(3 * i), :] = reshape(internal_noise[i, :], 1, :) .+ true_output
end
elseif add_or_mult_noise == "mult"
for i in 1:length(noise_dist)
output[(3 * (i - 1) + 1):(3 * i), :] = (1 .+ reshape(internal_noise[i, :], 1, :)) .* true_output
end
else
throw(
ArgumentError(
"`add_or_mult_noise` keyword expects either nothing, \"add\" or \"mult\". Got $(add_or_mult_noise).",
),
)
end
end

return size(output, 2) == 1 ? vec(output) : output
end


"""
G_nonlinear_old(ϕ, rng, noise_dist, ϕ_star = 0.0)
Noisy nonlinear map output given `ϕ` and multivariate noise.
The map: √(sum(ϕ - ϕ_star)²)
Expand All @@ -39,19 +96,20 @@ Inputs:
- `add_or_mult_noise = nothing` :: additive ["add"] or multiplicative ["mult"] variability optional
"""
function G_nonlinear(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = nothing)
function G_nonlinear_old(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = nothing)
if ndims(ϕ) == 1
p = length(ϕ)
ϕ_new = reshape(ϕ, (p, 1))
else
ϕ_new = ϕ
end
true_output = sqrt.(sum((ϕ_new .- ϕ_star) .^ 2, dims = 1)) # (1 x n_ens)

# Add noise from noise_dist

if isnothing(add_or_mult_noise)
#just copy the observation n_obs times
output = ones(length(noise_dist), 1) * true_output # (n_obs x n_ens)
output = repeat(true_output, length(noise_dist), 1) # (n_obs x n_ens)
elseif add_or_mult_noise == "add"
output = rand(rng, noise_dist, size(true_output, 2)) .+ true_output # (n_obs x n_ens)
elseif add_or_mult_noise == "mult"
Expand All @@ -67,14 +125,14 @@ function G_nonlinear(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = not
end

"""
linear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, return_matrix = false)
linear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, return_matrix = false)
Returns the objects defining a linear inverse problem.
Inputs:
- `ϕ_star` :: True parameter value (constrained space)
- `noise_level` :: Magnitude of the observational noise
- `noise_sd` :: Magnitude of the observational noise
- `n_obs` :: Dimension of the observational vector
- `rng` :: Random number generator
- `obs_corrmat` :: Correlation structure of the observational noise
Expand All @@ -87,12 +145,12 @@ Returns:
- `Γ` :: Observational covariance matrix
- `A` :: Forward model operator from constrained space
"""
function linear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, return_matrix = false)
function linear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, return_matrix = false)
A = linear_map(rng, n_obs)
# Define forward map from unconstrained space directly
G(x) = G_linear(x, A)
y_star = G(ϕ_star)
Γ = noise_level^2 * obs_corrmat
Γ = noise_sd^2 * obs_corrmat
noise = MvNormal(zeros(n_obs), Γ)
y_obs = y_star .+ rand(rng, noise)
if return_matrix
Expand All @@ -103,33 +161,67 @@ function linear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, r
end

"""
nonlinear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I; add_or_mult_noise = "add")
nonlinear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I; add_or_mult_noise = "add")
Returns the objects defining a random nonlinear inverse problem
Returns the objects defining a random nonlinear inverse problem
Inputs:
- `ϕ_star` :: True parameter value (constrained space)
- `noise_level` :: Magnitude of the observational noise
- `noise_sd` :: Magnitude of the observational noise
- `n_obs` :: Dimension of the observational vector
- `rng` :: Random number generator
- `obs_corrmat = I ` :: Correlation structure of the observational noise
- `add_or_mult_noise = nothing` :: additive ["add"] or multiplicative ["mult"] variability optional
Returns:
- `y_obs` :: Vector of observations
- `G` :: Map from constrained space to data space
- `Γ` :: Observational covariance matrix
"""
function nonlinear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, add_or_mult_noise = nothing)
Γ = noise_sd^2 * obs_corrmat
# G produces a 3D output so must div by 3
@assert n_obs % 3 == 0

function nonlinear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, add_or_mult_noise = nothing)
Γ = noise_level^2 * obs_corrmat
noise = MvNormal(zeros(n_obs), Γ)
internal_noise = MvNormal(zeros(Int(n_obs // 3)), noise_sd^2 * I)
# Define forward map from unconstrained space directly
G(x) = G_nonlinear(x, rng, noise, ϕ_star = ϕ_star, add_or_mult_noise = add_or_mult_noise)
G(x) = G_nonlinear(x, rng, internal_noise, ϕ_star = ϕ_star, add_or_mult_noise = add_or_mult_noise)
# Get observation
y_star = G(ϕ_star)
y_obs = y_star .+ rand(rng, noise)
return y_obs, G, Γ
end

function nonlinear_inv_problem_old(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, add_or_mult_noise = nothing)
Γ = noise_sd^2 * obs_corrmat
noise = MvNormal(zeros(n_obs), Γ)
# Define forward map from unconstrained space directly
G(x) = G_nonlinear_old(x, rng, noise, ϕ_star = ϕ_star, add_or_mult_noise = add_or_mult_noise)
# Get observation
y_star = G(ϕ_star)
y_obs = y_star .+ rand(rng, noise)
return y_obs, G, Γ
end


function plot_inv_problem_ensemble(prior, ekp, filepath)
gr()
ϕ_prior = transform_unconstrained_to_constrained(prior, get_u_prior(ekp))
ϕ_final = get_ϕ_final(prior, ekp)
p = plot(ϕ_prior[1, :], ϕ_prior[2, :], seriestype = :scatter, label = "Initial ensemble")
plot!(ϕ_final[1, :], ϕ_final[2, :], seriestype = :scatter, label = "Final ensemble")

plot!(
[ϕ_star[1]],
xaxis = "cons_p",
yaxis = "uncons_p",
seriestype = "vline",
linestyle = :dash,
linecolor = :red,
label = :none,
)
plot!([ϕ_star[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = :none)
savefig(p, filepath)
end
Loading

0 comments on commit e001485

Please sign in to comment.