Skip to content

Commit

Permalink
Merge pull request #6 from MetOffice/meto/dcarneir/calc_REs_unresolve…
Browse files Browse the repository at this point in the history
…d_scales

✨ Meto/dcarneir/calc_REs_unresolved_scales
  • Loading branch information
mo-dcarneir committed Mar 3, 2022
2 parents 51feda3 + 6e84854 commit 91fa5c8
Show file tree
Hide file tree
Showing 61 changed files with 1,357 additions and 929 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/__pycache__
184 changes: 184 additions & 0 deletions HL_error_covs/FunctionFitting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of ocean_error_covs and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
################### Python packages ###########################
import os
# Set os.environment to have NUM_THREADS="1" to avoid numpy using
# by default all processors and competing with parallel processes.
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"

from multiprocessing import Pool
################## Code modules ##############################
from modules.io_data import IO
from modules.plot import Plots
from modules.masks import applyMask
from modules.posproc import Posproc
from modules.StatisticTests import StatsTests

# Initialising the classes
IO = IO()
Posproc = Posproc()
Plots = Plots()
applyMask = applyMask()
StatsTests = StatsTests()


def HL_fitting_function(infile, outfilename, func_name="MultiGauss",
num_funcs=2, lenscale=(400,40), plot=None, outfig='./figures',
nproc=4, min_num_obs=2, max_iter=100, scalefac=1.0, f_test=True):

""" Top-level routine that fits a specific function to HL stats covariance file
***************** PARAMETERS *******************
1. infile: name of file containing the HL error covariances
2. outfilename: name of file to write the results to
3. func_name: name of function to fit to (options: MultiGauss and MultiGauss_Fixed)
4. num_funcs: number of functions to use (default: 2)
5. lenscale: Tuple of pre-defined lengthscales used in MultiGauss_Fixed
or in MultiGauss as initial guesses for the lengthscales.
Number of tuple members must be equal to number of functions.
6. plot: positions of (x,y) pairs to plot or None (default: None)
7. outfig: path to save the figs
8. nproc: number of processors to use (default: 4)
9. min_num_obs: minimum number of observations to do calculations
10. max_iter: max number of iterations
11. scalefac: factor to scale the variances when they are very small
12. f_test perform an f_test on the result and write out the p value
"""

# Checking consistency of input parameters
if(func_name != "MultiGauss" and func_name != "MultiGauss_Fixed"):
raise ValueError("[ERROR] FUNCTION NOT AVAILABLE")

if(len(lenscale) != num_funcs):
raise ValueError("[ERROR] NUMBER OF LENGTHSCALES NOT COMPATIBLE " +
"WITH NUMBER OF FUNCTIONS")

if(not os.path.exists(infile)):
raise ValueError("[ERROR] INPUT FILE NOT FOUND")

# Read dimension variables from netcdf file
ncdata = IO.ncread_variables(infile, ['latitude', 'longitude', 'bins', 'depth'])
lats = ncdata[0]
lons = ncdata[1]
bins = ncdata[2]
depth = ncdata[3]

# Create netcdf object and add dimensions
outfile = IO.nc_define_dimensions(outfilename,
['latitude', 'longitude' , 'depth'],
[len(lats), len(lons), len(depth)])

# Write dimension variables
IO.ncwrite_variables(outfile, ['latitude'], ['f'], ('latitude'), vardata=[lats])
IO.ncwrite_variables(outfile, ['longitude'], ['f'], ('longitude'), vardata=[lons])
IO.ncwrite_variables(outfile, ['depth'], ['f'], ('depth'), vardata=[depth])

# Add attributes
outfile.Function = "Function fitting done using the "+func_name+" function"

# Add variables
IO.ncwrite_variables(outfile, ['RSS', 'RSS_vs_mean', 'degrees_of_freedom', 'obs_err'],
['f', 'f', 'i', 'f'], ('depth', 'latitude', 'longitude'))
if f_test:
IO.ncwrite_variables(outfile, ['P_val'], ['f'], ('depth', 'latitude', 'longitude'))

# Calculate x positions based on the separation distances
x_val = Posproc.calc_x_positions(bins)

for lev in range(0, len(depth)):
print(f"MESSAGE: Fitting function {func_name} to ErrorCov data: {depth[lev]} m")

# set up workers
workers = Pool(nproc)

# Reading error covariance variables
ncdata = IO.ncread_variables(infile, ['GridNumObs', 'GridVariance', \
'Correlation', 'Covariance'], dep_lev=lev)
numobsvar = ncdata[0]
var = ncdata[1]
cors = ncdata[2]
covs = ncdata[3]

# account for precision errors by forcing minimum and
# maximum correlation to [-1.0, 1.0]
cors[cors>1.] = 1.
cors[cors<-1.] = -1.

# Creating list with arguments to run in parallel
arg_lists = Posproc.create_arg_list(x_val, cors, var, numobsvar, min_num_obs,
func_name, num_funcs, lenscale, max_iter,
scalefac)

# Get workers to do parallel calculations
results = workers.map(Posproc.fitter, arg_lists)
workers.close()

# Unravel results into output grids
params, obs_err, rss_func_grid, rss_mean_grid, dof = \
Posproc.results_to_grid(results, len(lats), len(lons))

# If requested perform F-test comparing to mean
p_val = None
if f_test:
if (func_name == "MultiGauss"):
num_params = 2 * num_funcs
elif (func_name == "MultiGauss_Fixed"):
num_params = num_funcs
else:
raise ValueError(f"[ERROR] Cannot calculate num_params for function={func_name}")
p_val = StatsTests.f_test_pvalue(rss_func_grid, rss_mean_grid,
num_params, num_params + dof)

# Remove scale factor if needed
obs_err = obs_err/scalefac
for param in range(0, len(params)):
if "Magnitude" in arg_lists[0]["func"].param_names()[param]:
params[param] = params[param]/scalefac

# Plot some results if requested
if plot != None:
print(f"MESSAGE: Plotting results - data versus fitting: {depth[lev]} m")
Plots.plot_data_vs_fitting(outfig, plot, x_val, cors, var, obs_err, lats, lons,
depth[lev], params, func_name, num_funcs, lenscale, p_val)

for param in range(0, len(params)):
if lev == 0:
# Define netcdf variables from fitting function results
IO.ncwrite_variables(outfile, [arg_lists[0]["func"].param_names()[param]],
['f'], ('depth', 'latitude', 'longitude'))

# Masking function fitting outputs
params[param].mask = applyMask.create_mask(params[param].mask, [rss_func_grid],
[-1e10], ['=='], var_look_nan=rss_func_grid)

# Adding function fitting parameter to netcdf
IO.ncwrite_variables(outfile, [arg_lists[0]["func"].param_names()[param]],
[], [], vardata=[params[param]], create_vars=False,
dep_lev=lev)

# Masking RSS and obs_err
rss_func_grid.mask = applyMask.create_mask(rss_func_grid.mask, [rss_func_grid], [-1e10],
['=='], var_look_nan=rss_func_grid)
rss_mean_grid.mask = applyMask.create_mask(rss_mean_grid.mask, [rss_func_grid], [-1e10],
['=='], var_look_nan=rss_func_grid)
obs_err.mask = applyMask.create_mask(obs_err.mask, [rss_func_grid], [-1e10],
['=='], var_look_nan=rss_func_grid)

# Add chi_err and obs_err to netcdf
IO.ncwrite_variables(outfile, ['obs_err', 'RSS', 'RSS_vs_mean', 'degrees_of_freedom'],
[], [], vardata=[obs_err, rss_func_grid, rss_mean_grid, dof],
create_vars=False, dep_lev=lev)

# Add P-val to netcdf
if f_test:
p_val.mask = applyMask.create_mask(p_val.mask, [rss_func_grid], [-1e10],
['=='], var_look_nan=rss_func_grid)
IO.ncwrite_variables(outfile, ['P_val'], [], [], vardata=[p_val],
create_vars=False, dep_lev=lev)

outfile.close()
129 changes: 101 additions & 28 deletions HL_error_covs/master.py → HL_error_covs/HL.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
import numpy as np
from multiprocessing import Pool
################## Code modules ##############################
import HL_error_covs.arrays as arrays
from HL_error_covs.io_data import IO
from HL_error_covs.utils import Utils
import modules.arrays as arrays
from modules.io_data import IO
from modules.utils import Utils
from HL_error_covs.errorCovs import HLerrorCovs
from HL_error_covs.masks import applyMask
from modules.masks import applyMask

# Initialising the classes
IO = IO()
Expand Down Expand Up @@ -78,17 +78,27 @@ def HL_cov_accum_stats(list_of_fdbackfiles, obs_type="SST",
list_per_proc = Utils.divide_files_per_proc(nproc, list_of_fdbackfiles)

# Create netcdf object and add dimensions
outfile = IO.nc_define_dimensions(outfilename, nlat, nlon, nbin, ndep)
outfile = IO.nc_define_dimensions(outfilename,
['latitude', 'longitude', 'bins', 'depth'],
[nlat, nlon, nbin, ndep])

# Add netcdf variables (accumulated stats)
IO.nc_define_accum_stats_variables(outfile)
# Write dimension variables
IO.ncwrite_variables(outfile, ['latitude'], ['f'], ('latitude'), [grid_lat])
IO.ncwrite_variables(outfile, ['longitude'], ['f'], ('longitude'), [grid_lon])
IO.ncwrite_variables(outfile, ['depth'], ['f'], ('depth'), [depths])
IO.ncwrite_variables(outfile, ['bins'], ['f'], ('bins'), [bins])

# Write dimension variables (depth, lat, lon and bins)
IO.ncwrite_dimension_variables(outfile, grid_lat, grid_lon, depths, bins)
# Create 4D netcdf variables (accumulated stats)
IO.ncwrite_variables(outfile, ['SumX', 'SumY', 'SumXSq', 'SumYSq', 'SumXY', 'NumObsCov'],
['f', 'f', 'f', 'f', 'f', 'i'], ('depth', 'latitude', 'longitude', 'bins'))

# Create 3D netcdf variables (accumulated stats)
IO.ncwrite_variables(outfile, ['GridSum', 'GridSumSq', 'GridSumObsStd', 'GridNumObs'],
['f', 'f', 'f', 'i'], ('depth', 'latitude', 'longitude'))

print("MESSAGE: {} nprocs to process {} feedback files".format(nproc,
len(list_of_fdbackfiles)))
arg_list=[]

for dep_lev in range(0, ndep):
print("MESSAGE: Calculating accumulated stats for level: ", dep_lev)

Expand Down Expand Up @@ -122,8 +132,17 @@ def HL_cov_accum_stats(list_of_fdbackfiles, obs_type="SST",
sum_stats += p[0]
grid_stats += p[1]

# Write accumulated stats to output file
IO.ncwrite_accum_stats(outfile, dep_lev, sum_stats, grid_stats)
# Write 4D accumulated stats to output file
IO.ncwrite_variables(outfile, ['SumX', 'SumY', 'SumXSq', 'SumYSq', 'SumXY', 'NumObsCov'], [], [], \
vardata=[sum_stats.sum_x, sum_stats.sum_y, sum_stats.sum_x_sq, \
sum_stats.sum_y_sq, sum_stats.sum_xy, sum_stats.num_pairs_in_cov], \
create_vars=False, dep_lev=dep_lev)

# Write 3D accumulated stats to output file
IO.ncwrite_variables(outfile, ['GridSum', 'GridSumSq', 'GridNumObs', 'GridSumObsStd'], [], [], \
vardata=[grid_stats.grid_sum, grid_stats.grid_sum_sq, \
grid_stats.num_obs_in_grid, grid_stats.grid_sum_obs_std], \
create_vars=False, dep_lev=dep_lev)

outfile.close()

Expand All @@ -143,43 +162,97 @@ def HL_error_covs(list_of_files, outfilename="corrs.nc"):
if list_of_files is None:
raise ValueError("[ERROR] NETCDF FILES NOT FOUND")

grid_lat, grid_lon, depths, bins = IO.ncread_dimension_variables(list_of_files[0])
nbin = len(bins)
ndep = len(depths)
ncdata = IO.ncread_variables(list_of_files[0], ['latitude', 'longitude', 'bins', 'depth'])
grid_lat = ncdata[0]
grid_lon = ncdata[1]
bins = ncdata[2]
depths = ncdata[3]
nlat = len(grid_lat)
nlon = len(grid_lon)
nbin = len(bins)
ndep = len(depths)

# Create netcdf object and add dimensions
outfile = IO.nc_define_dimensions(outfilename, nlat, nlon, nbin, ndep)
outfile = IO.nc_define_dimensions(outfilename,
['latitude', 'longitude', 'bins', 'depth'],
[nlat, nlon, nbin, ndep])

# Add netcdf variables (final ErrorCovs stats)
IO.nc_define_cov_variables(outfile)
# Write dimension variables
IO.ncwrite_variables(outfile, ['latitude'], ['f'], ('latitude'), [grid_lat])
IO.ncwrite_variables(outfile, ['longitude'], ['f'], ('longitude'), [grid_lon])
IO.ncwrite_variables(outfile, ['depth'], ['f'], ('depth'), [depths])
IO.ncwrite_variables(outfile, ['bins'], ['f'], ('bins'), [bins])

# Write dimension variables (depth, lat, lon and bins)
IO.ncwrite_dimension_variables(outfile, grid_lat, grid_lon,
depths, bins)
# Add netcdf 4D variables (final ErrorCovs stats)
IO.ncwrite_variables(outfile, ['NumObsCov', 'Covariance', 'Correlation'],
['i', 'f', 'f'], ('depth', 'latitude', 'longitude', 'bins'),
fill_value=[-99999, 1e10, 1e10])

# Add netcdf 3D variables (final ErrorCovs stats)
IO.ncwrite_variables(outfile, ['GridVariance', 'GridNumObs', \
'GridMeanBinnedError', 'GridMeanObsStd'], ['f', 'i', 'f', 'f'],
('depth', 'latitude', 'longitude'),
fill_value=[1e10, -99999, 1e10, 1e10])

for dep_lev in range(0, ndep):
print("MESSAGE: Calculating error covariance for level: " + str(depths[dep_lev]) + " m")
final_cov_stats = arrays.CovSumStats((nlat, nlon, nbin))
final_grid_stats = arrays.GridSumStats((nlat, nlon))
for f in list_of_files:
print("MESSAGE: Reading file {}".format(f))
cov_stats, grid_stats = IO.ncread_accum_stats(f, nlat, nlon, nbin, dep_lev)

# Read cov_stats variables from netcdf
ncdata = IO.ncread_variables(f, ['SumX', 'SumY', 'SumXSq', 'SumYSq', 'SumXY', \
'NumObsCov'], dep_lev=dep_lev)

cov_stats = arrays.CovSumStats((nlat, nlon, nbin))
cov_stats.sum_x = ncdata[0]
cov_stats.sum_y = ncdata[1]
cov_stats.sum_x_sq = ncdata[2]
cov_stats.sum_y_sq = ncdata[3]
cov_stats.sum_xy = ncdata[4]
cov_stats.num_pairs_in_cov = ncdata[5]

# Read grid_stats variables from netcdf
ncdata = IO.ncread_variables(f, ['GridSum', 'GridSumSq', 'GridNumObs', \
'GridSumObsStd'], dep_lev=dep_lev)

grid_stats = arrays.GridSumStats((nlat, nlon))
grid_stats.grid_sum = ncdata[0]
grid_stats.grid_sum_sq = ncdata[1]
grid_stats.num_obs_in_grid = ncdata[2]
grid_stats.grid_sum_obs_std = ncdata[3]

final_cov_stats += cov_stats
final_grid_stats += grid_stats

# Calculate correlation and covariancee
# Calculate correlation and covariance
cov_xy, corr_xy, grid_mean, grid_var, numobsgrid, \
numpairscov, grid_mean_obstd = HLerrorCovs.calc_err_covs(final_cov_stats,
final_grid_stats, nbin, nlat, nlon)

# Mask output data
applyMask.mask_output_cov_data(grid_mean, grid_var, grid_mean_obstd,
numobsgrid, numpairscov, cov_xy, corr_xy)
grid_mean.mask = applyMask.create_mask(grid_mean.mask, [grid_mean, grid_mean, \
numobsgrid], [1e10, -1e10, 0], ['>', '<', '=='],
var_look_nan=grid_mean)
grid_mean_obstd.mask = applyMask.create_mask(grid_mean_obstd.mask, [grid_mean_obstd, \
grid_mean_obstd, numobsgrid], [1e10, -1e10, 0],
['>', '<', '=='], var_look_nan=grid_mean_obstd)
grid_var.mask = applyMask.create_mask(grid_var.mask, [grid_var, grid_var, numobsgrid],
[1e10, -1e10, 0], ['>', '<', '=='], var_look_nan=grid_var)
cov_xy.mask = applyMask.create_mask(cov_xy.mask, [cov_xy, cov_xy, numpairscov],
[1e10, -1e10, 0], ['>', '<', '=='], var_look_nan=cov_xy)
corr_xy.mask = applyMask.create_mask(corr_xy.mask, [corr_xy, corr_xy, numpairscov],
[1e10, -1e10, 0], ['>', '<', '=='], var_look_nan=corr_xy)

# Write 4D variables to output file
IO.ncwrite_variables(outfile, ['NumObsCov', 'Covariance', 'Correlation'], [], [], \
vardata=[numpairscov, cov_xy, corr_xy], create_vars=False,
dep_lev=dep_lev)

# Write error covariances to output file
IO.ncwrite_covariance(outfile, dep_lev, grid_mean, grid_var, grid_mean_obstd,
numobsgrid, numpairscov, cov_xy, corr_xy)
# Write 3D variables to output file
IO.ncwrite_variables(outfile, ['GridVariance', 'GridNumObs', 'GridMeanBinnedError', \
'GridMeanObsStd'], [], [], vardata=[grid_var, numobsgrid, \
grid_mean, grid_mean_obstd], create_vars=False, dep_lev=dep_lev)

outfile.close()
Loading

0 comments on commit 91fa5c8

Please sign in to comment.