Merge #294

294: [WIP] Resolving some test problem consistency and apparent UKI instability r=odunbar a=odunbar  ## Purpose  Closes #287 ## Content  - more consistent plotting between tests - adjustments to the test problems - removed inappropriate Localization tests in `EnsembleKalmanProcess/runtests.jl` examples, (they are already properly tested in `Localizers/runtests.jl` ) - These caused blow up without disturbing the ensemble mean, therefore added some spread-checking too to ensure the tr(ensemble cov) does not increase too much - test UKI with nonlinear problems, not just linear ones - Added prior Tikhonov regularization for UKI, modified test checks to account for the regularization. - Changed the default `update_freq` in UKI from 1 to 0. I.e. uses prior cov not current cov by default. - Minor type change in UKI constructor to allow arrays to be converted to float type internally - added note to docs RE this update  ---- - [ ] I have read and checked the items on the review checklist. ## UPDATED Example: To reproduce run `julia --project; ENV["CES_TEST_PLOT_OUTPUT"] = true; ]; test` and look for the figures `XYZ_test_4.png` in the runtest directory. Problem: `x in R^2`, and `y = (||x-x*||_1,||x-x*||_2, ||x-x*||_infty ) + N(0,0.01)`, 100 iterations of methods: Prior bounded by `[-2,2]` in x axis ### EKI & EKS <img src="https://github.com/CliMA/EnsembleKalmanProcesses.jl/assets/47412152/ac03092e-b0b6-4fb6-8dd0-8ab000f5c2a8" width=300> <img src="https://github.com/CliMA/EnsembleKalmanProcesses.jl/assets/47412152/7a275262-a1ee-4bc8-a7cb-b71743387c96" width=300> ### Tikhonov UKI quad & simplex <img src="https://github.com/CliMA/EnsembleKalmanProcesses.jl/assets/47412152/38017e26-45ac-4b5e-9dc3-15dbd548d040" width=300> <img src="https://github.com/CliMA/EnsembleKalmanProcesses.jl/assets/47412152/21748770-4aa4-47ba-b62f-c70aa57c03bc" width=300> Co-authored-by: odunbar <[email protected]>
CliMA · Jun 14, 2023 · b18af99 · b18af99
2 parents deb0e8e + 2193f9b
commit b18af99
Show file tree

Hide file tree

Showing 6 changed files with 400 additions and 211 deletions.
diff --git a/docs/src/unscented_kalman_inversion.md b/docs/src/unscented_kalman_inversion.md
@@ -80,8 +80,10 @@ The free parameters in the unscented Kalman inversion are ``\alpha, r, \Sigma_{\
 
     * otherwise ``\Lambda = C_0``, this allows that the converged covariance matrix is a weighted average between the posterior covariance matrix with an uninformative prior and ``C_0``.
 
-In short, users only need to change the ``\alpha`` (`α_reg`), and the frequency to update the ``\Lambda`` (`update_freq`). The user can first try `α_reg = 1.0` and `update_freq = 0`.
+In short, users only need to change the ``\alpha`` (`α_reg`), and the frequency to update the ``\Lambda`` to the current covariance (`update_freq`). The user can first try `α_reg = 1.0` and `update_freq = 0` (corresponding to ``\Lambda = C_0``).
 
+!!! note "Preventing ensemble divergence"
+    If UKI suffers divergence (for example when inverse problems are not well-posed), one can prevent it by using Tikhonov regularization (see [Huang, Schneider, Stuart, 2022](https://doi.org/10.1016/j.jcp.2022.111262)). It is used by setting the `impose_prior = true` flag. In this mode, the free parameters are fixed to `α_reg = 1.0`, `update_freq = 1`. 
 
 ## Implementation
 

diff --git a/src/UnscentedKalmanInversion.jl b/src/UnscentedKalmanInversion.jl
@@ -15,9 +15,11 @@ $(TYPEDFIELDS)
         u0_mean::AbstractVector{FT},
         uu0_cov::AbstractMatrix{FT};
         α_reg::FT = 1.0,
-        update_freq::IT = 1,
+        update_freq::IT = 0,
         modified_unscented_transform::Bool = true,
-        prior_mean::Union{AbstractVector{FT}, Nothing} = nothing,
+        impose_prior::Bool = false,
+        prior_mean::Any,
+        prior_cov::Any,
         sigma_points::String = symmetric
     ) where {FT <: AbstractFloat, IT <: Int}
 
@@ -38,7 +40,14 @@ Inputs:
   uninformative prior.
   - `modified_unscented_transform`: Modification of the UKI quadrature given
     in Huang et al (2021).
+  - `impose_prior`: using augmented system (Tikhonov regularization with Kalman inversion in Chada 
+     et al 2020 and Huang et al (2022)) to regularize the inverse problem, which also imposes prior 
+     for posterior estimation. If impose_prior == true, prior mean and prior cov must be provided. 
+     This is recommended to use, especially when the number of observations is smaller than the number 
+     of parameters (ill-posed inverse problems). When this is used, other regularizations are turned off
+     automatically.
   - `prior_mean`: Prior mean used for regularization.
+  - `prior_cov`: Prior cov used for regularization.
   - `sigma_points`: String of sigma point type, it can be `symmetric` with `2N_par+1` 
      ensemble members or `simplex` with `N_par+2` ensemble members.
   
@@ -67,19 +76,48 @@ mutable struct Unscented{FT <: AbstractFloat, IT <: Int} <: Process
     r::AbstractVector{FT}
     "update frequency"
     update_freq::IT
+    "using augmented system (Tikhonov regularization with Kalman inversion in Chada 
+    et al 2020 and Huang et al (2022)) to regularize the inverse problem, which also imposes prior 
+    for posterior estimation."
+    impose_prior::Bool
+    "prior mean - defaults to initial mean"
+    prior_mean::Any
+    "prior covariance - defaults to initial covariance"
+    prior_cov::Any
     "current iteration number"
     iter::IT
 end
 
 function Unscented(
-    u0_mean::AbstractVector{FT},
-    uu0_cov::AbstractMatrix{FT};
+    u0_mean::VV,
+    uu0_cov::MM;
     α_reg::FT = 1.0,
-    update_freq::IT = 1,
+    update_freq::IT = 0,
     modified_unscented_transform::Bool = true,
-    prior_mean::Union{AbstractVector{FT}, Nothing} = nothing,
+    impose_prior::Bool = false,
+    prior_mean::Any = nothing,
+    prior_cov::Any = nothing,
     sigma_points::String = "symmetric",
-) where {FT <: AbstractFloat, IT <: Int}
+) where {FT <: AbstractFloat, IT <: Int, VV <: AbstractVector, MM <: AbstractMatrix}
+
+    u0_mean = FT.(u0_mean)
+    uu0_cov = FT.(uu0_cov)
+    if impose_prior
+        if isnothing(prior_mean)
+            @info "`impose_prior=true` but `prior_mean=nothing`, taking initial mean as prior mean."
+            prior_mean = u0_mean
+        else
+            prior_mean = FT.(prior_mean)
+        end
+        if isnothing(prior_cov)
+            @info "`impose_prior=true` but `prior_cov=nothing`, taking initial covariance as prior covariance"
+            prior_cov = uu0_cov
+        else
+            prior_cov = FT.(prior_cov)
+        end
+        α_reg = 1.0
+        update_freq = 1
+    end
 
     if sigma_points == "symmetric"
         N_ens = 2 * size(u0_mean, 1) + 1
@@ -89,7 +127,6 @@ function Unscented(
         throw(ArgumentError("sigma_points type is not recognized. Select from \"symmetric\" or \"simplex\". "))
     end
 
-
     N_par = size(u0_mean, 1)
     # ensemble size
 
@@ -165,6 +202,9 @@ function Unscented(
         α_reg,
         r,
         update_freq,
+        impose_prior,
+        prior_mean,
+        prior_cov,
         iter,
     )
 end
@@ -174,7 +214,7 @@ function Unscented(prior::ParameterDistribution; kwargs...)
     u0_mean = Vector(mean(prior)) # mean of unconstrained distribution
     uu0_cov = Matrix(cov(prior)) # cov of unconstrained distribution
 
-    return Unscented(u0_mean, uu0_cov; kwargs...)
+    return Unscented(u0_mean, uu0_cov; prior_mean = u0_mean, prior_cov = uu0_cov, kwargs...)
 
 end
 
@@ -236,10 +276,17 @@ function FailureHandler(process::Unscented, method::SampleSuccGauss)
         cov_localized = uki.localizer.localize(cov_est)
         uu_p_cov, ug_cov, gg_cov = get_cov_blocks(cov_localized, size(u_p, 1))
 
-        tmp = ug_cov / gg_cov
-
-        u_mean = u_p_mean + tmp * (obs_mean - g_mean)
-        uu_cov = uu_p_cov - tmp * ug_cov'
+        if uki.process.impose_prior
+            ug_cov_reg = [ug_cov uu_p_cov]
+            gg_cov_reg = [gg_cov ug_cov'; ug_cov uu_p_cov+uki.process.prior_cov / uki.Δt[end]]
+            tmp = ug_cov_reg / gg_cov_reg
+            u_mean = u_p_mean + tmp * [obs_mean - g_mean; uki.process.prior_mean - u_p_mean]
+            uu_cov = uu_p_cov - tmp * ug_cov_reg'
+        else
+            tmp = ug_cov / gg_cov
+            u_mean = u_p_mean + tmp * (obs_mean - g_mean)
+            uu_cov = uu_p_cov - tmp * ug_cov'
+        end
 
         ########### Save results
         push!(uki.process.obs_pred, g_mean) # N_ens x N_data

diff --git a/test/EnsembleKalmanProcess/inverse_problem.jl b/test/EnsembleKalmanProcess/inverse_problem.jl
@@ -25,7 +25,64 @@ function linear_map(rng, n_obs)
 end
 
 """
-    G_nonlinear(ϕ, rng, noise_dist, ϕ_star = 0.0)
+    G_nonlinear_old(ϕ, rng, noise_dist, ϕ_star = 0.0)
+
+Noisy nonlinear map output given `ϕ` and multivariate noise.
+The map: `(||ϕ-ϕ_star^*||_1, ||ϕ-ϕ_star||_2, ||ϕ-ϕ_star||_∞)`
+
+Inputs:
+
+ - `ϕ`                           :: Input to the forward map (constrained space)
+ - `rng`                         :: Random number generator
+ - `noise_dist`                  :: Multivariate noise distribution from which stochasticity of map is sampled.
+ - `ϕ_star`                      :: Minimum of the function
+ - `add_or_mult_noise = nothing` :: additive ["add"] or multiplicative ["mult"] variability optional
+"""
+function G_nonlinear(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = nothing)
+    if ndims(ϕ) == 1
+        p = length(ϕ)
+        ϕ_new = reshape(ϕ, (p, 1))
+    else
+        ϕ_new = ϕ
+    end
+
+    true_output = zeros(3, size(ϕ_new, 2)) #l^1, l^2, l^inf norm
+    true_output[1, :] = reshape([norm(c, 1) for c in eachcol(ϕ_new .- ϕ_star)], 1, :)
+    true_output[2, :] = reshape([norm(c, 2) for c in eachcol(ϕ_new .- ϕ_star)], 1, :)
+    true_output[3, :] = reshape([maximum(abs.(c)) for c in eachcol(ϕ_new .- ϕ_star)], 1, :)
+
+
+    # Add noise from noise_dist
+    if isnothing(add_or_mult_noise)
+        #just copy the observation n_obs//3 times
+        output = repeat(true_output, length(noise_dist), 1) # (n_obs x n_ens)
+    else
+        output = zeros(length(noise_dist) * size(true_output, 1), size(true_output, 2))
+        internal_noise = rand(rng, noise_dist, size(true_output, 2))
+
+        if add_or_mult_noise == "add"
+            for i in 1:length(noise_dist)
+                output[(3 * (i - 1) + 1):(3 * i), :] = reshape(internal_noise[i, :], 1, :) .+ true_output
+            end
+        elseif add_or_mult_noise == "mult"
+            for i in 1:length(noise_dist)
+                output[(3 * (i - 1) + 1):(3 * i), :] = (1 .+ reshape(internal_noise[i, :], 1, :)) .* true_output
+            end
+        else
+            throw(
+                ArgumentError(
+                    "`add_or_mult_noise` keyword expects either nothing, \"add\" or \"mult\". Got $(add_or_mult_noise).",
+                ),
+            )
+        end
+    end
+
+    return size(output, 2) == 1 ? vec(output) : output
+end
+
+
+"""
+    G_nonlinear_old(ϕ, rng, noise_dist, ϕ_star = 0.0)
 
 Noisy nonlinear map output given `ϕ` and multivariate noise.
 The map: √(sum(ϕ - ϕ_star)²)
@@ -39,19 +96,20 @@ Inputs:
  - `add_or_mult_noise = nothing` :: additive ["add"] or multiplicative ["mult"] variability optional
 
 """
-function G_nonlinear(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = nothing)
+function G_nonlinear_old(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = nothing)
     if ndims(ϕ) == 1
         p = length(ϕ)
         ϕ_new = reshape(ϕ, (p, 1))
     else
         ϕ_new = ϕ
     end
     true_output = sqrt.(sum((ϕ_new .- ϕ_star) .^ 2, dims = 1)) # (1 x n_ens)
+
     # Add noise from noise_dist
 
     if isnothing(add_or_mult_noise)
         #just copy the observation n_obs times
-        output = ones(length(noise_dist), 1) * true_output # (n_obs x n_ens)
+        output = repeat(true_output, length(noise_dist), 1) # (n_obs x n_ens)
     elseif add_or_mult_noise == "add"
         output = rand(rng, noise_dist, size(true_output, 2)) .+ true_output # (n_obs x n_ens)   
     elseif add_or_mult_noise == "mult"
@@ -67,14 +125,14 @@ function G_nonlinear(ϕ, rng, noise_dist; ϕ_star = 0.0, add_or_mult_noise = not
 end
 
 """
-    linear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, return_matrix = false)
+    linear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, return_matrix = false)
 
 Returns the objects defining a linear inverse problem.
 
 Inputs:
 
  - `ϕ_star`          :: True parameter value (constrained space)
- - `noise_level`     :: Magnitude of the observational noise
+ - `noise_sd`        :: Magnitude of the observational noise
  - `n_obs`           :: Dimension of the observational vector
  - `rng`             :: Random number generator
  - `obs_corrmat`     :: Correlation structure of the observational noise
@@ -87,12 +145,12 @@ Returns:
  - `Γ`               :: Observational covariance matrix
  - `A`               :: Forward model operator from constrained space
 """
-function linear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, return_matrix = false)
+function linear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, return_matrix = false)
     A = linear_map(rng, n_obs)
     # Define forward map from unconstrained space directly
     G(x) = G_linear(x, A)
     y_star = G(ϕ_star)
-    Γ = noise_level^2 * obs_corrmat
+    Γ = noise_sd^2 * obs_corrmat
     noise = MvNormal(zeros(n_obs), Γ)
     y_obs = y_star .+ rand(rng, noise)
     if return_matrix
@@ -103,33 +161,67 @@ function linear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, r
 end
 
 """
-    nonlinear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I; add_or_mult_noise = "add")
+    nonlinear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I; add_or_mult_noise = "add")
 
-Returns the objects defining a random nonlinear inverse problem
+Returns the objects defining a random nonlinear inverse problem 
 
 Inputs:
 
  - `ϕ_star`                      :: True parameter value (constrained space)
- - `noise_level`                 :: Magnitude of the observational noise
+ - `noise_sd`                    :: Magnitude of the observational noise
  - `n_obs`                       :: Dimension of the observational vector
  - `rng`                         :: Random number generator
  - `obs_corrmat = I  `           :: Correlation structure of the observational noise 
  - `add_or_mult_noise = nothing` :: additive ["add"] or multiplicative ["mult"] variability optional
-
 Returns:
 
  - `y_obs`           :: Vector of observations
  - `G`               :: Map from constrained space to data space
  - `Γ`               :: Observational covariance matrix
 """
+function nonlinear_inv_problem(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, add_or_mult_noise = nothing)
+    Γ = noise_sd^2 * obs_corrmat
+    # G produces a 3D output so must div by 3
+    @assert n_obs % 3 == 0
 
-function nonlinear_inv_problem(ϕ_star, noise_level, n_obs, rng; obs_corrmat = I, add_or_mult_noise = nothing)
-    Γ = noise_level^2 * obs_corrmat
     noise = MvNormal(zeros(n_obs), Γ)
+    internal_noise = MvNormal(zeros(Int(n_obs // 3)), noise_sd^2 * I)
     # Define forward map from unconstrained space directly
-    G(x) = G_nonlinear(x, rng, noise, ϕ_star = ϕ_star, add_or_mult_noise = add_or_mult_noise)
+    G(x) = G_nonlinear(x, rng, internal_noise, ϕ_star = ϕ_star, add_or_mult_noise = add_or_mult_noise)
     # Get observation
     y_star = G(ϕ_star)
     y_obs = y_star .+ rand(rng, noise)
     return y_obs, G, Γ
 end
+
+function nonlinear_inv_problem_old(ϕ_star, noise_sd, n_obs, rng; obs_corrmat = I, add_or_mult_noise = nothing)
+    Γ = noise_sd^2 * obs_corrmat
+    noise = MvNormal(zeros(n_obs), Γ)
+    # Define forward map from unconstrained space directly
+    G(x) = G_nonlinear_old(x, rng, noise, ϕ_star = ϕ_star, add_or_mult_noise = add_or_mult_noise)
+    # Get observation
+    y_star = G(ϕ_star)
+    y_obs = y_star .+ rand(rng, noise)
+    return y_obs, G, Γ
+end
+
+
+function plot_inv_problem_ensemble(prior, ekp, filepath)
+    gr()
+    ϕ_prior = transform_unconstrained_to_constrained(prior, get_u_prior(ekp))
+    ϕ_final = get_ϕ_final(prior, ekp)
+    p = plot(ϕ_prior[1, :], ϕ_prior[2, :], seriestype = :scatter, label = "Initial ensemble")
+    plot!(ϕ_final[1, :], ϕ_final[2, :], seriestype = :scatter, label = "Final ensemble")
+
+    plot!(
+        [ϕ_star[1]],
+        xaxis = "cons_p",
+        yaxis = "uncons_p",
+        seriestype = "vline",
+        linestyle = :dash,
+        linecolor = :red,
+        label = :none,
+    )
+    plot!([ϕ_star[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = :none)
+    savefig(p, filepath)
+end