From 1104449089c823e5b2e6e52f9e297c504935ee08 Mon Sep 17 00:00:00 2001 From: "Gregory L. Wagner" Date: Sun, 24 Mar 2024 04:15:55 -0700 Subject: [PATCH] Generalize file splitting for output writers (#3515) * Generalize file splitting for JLD2OutputWriter so that alternative criterion to file size may be used * Update src/OutputWriters/jld2_output_writer.jl Co-authored-by: Navid C. Constantinou * Export FileSizeLimit and update JLD2OutputWriter test * implementing file splitting in netcdf * Properly export FileSizeLimit * fix handeling of path writer.filepath with the file_splitting * add support to file splitting by size in netCDFs * add support to file splitting by size in netCDFs * update warning to properly print variable. Co-authored-by: Gregory L. Wagner * return to the use of FileSizeLimit(200KiB) in the test to make it easier to read * update netcdf to match jld2, and add return in update_file_splitting_schedule * fix tests filesize tests * Apply suggestions from code review * Update test_jld2_output_writer.jl * fix show for NetCDFOutputWriter * fix doctests * Update src/OutputWriters/netcdf_output_writer.jl Co-authored-by: Gregory L. Wagner * Update src/OutputWriters/jld2_output_writer.jl Co-authored-by: Gregory L. Wagner * fix doctests * fix doctests * fix doctest * cleanup unecessary imports * fix doctest * fix doctests * fix doctests --------- Co-authored-by: Navid C. Constantinou Co-authored-by: josuemtzmo --- benchmark/benchmark_multi_GPU.jl | 1 - docs/src/model_setup/output_writers.md | 20 ++++-- src/Oceananigans.jl | 2 +- src/OutputWriters/OutputWriters.jl | 3 +- src/OutputWriters/jld2_output_writer.jl | 67 ++++++++++--------- src/OutputWriters/netcdf_output_writer.jl | 55 ++++++++++----- src/OutputWriters/output_writer_utils.jl | 41 ++++++++++++ src/OutputWriters/windowed_time_average.jl | 6 +- test/test_jld2_output_writer.jl | 4 +- test/test_netcdf_output_writer.jl | 10 ++- validation/barotropic_gyre/barotropic_gyre.jl | 8 +-- .../solid_body_rotation.jl | 1 - .../solid_body_tracer_advection.jl | 1 - 13 files changed, 143 insertions(+), 76 deletions(-) diff --git a/benchmark/benchmark_multi_GPU.jl b/benchmark/benchmark_multi_GPU.jl index 0abc8b21ca..39886efa3c 100644 --- a/benchmark/benchmark_multi_GPU.jl +++ b/benchmark/benchmark_multi_GPU.jl @@ -12,7 +12,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: ExplicitFreeSurface using Oceananigans.Utils: prettytime, hours -using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval using Oceananigans.MultiRegion using Oceananigans.TurbulenceClosures: VerticallyImplicitTimeDiscretization diff --git a/docs/src/model_setup/output_writers.md b/docs/src/model_setup/output_writers.md index a9e6d72536..20ce934d17 100644 --- a/docs/src/model_setup/output_writers.md +++ b/docs/src/model_setup/output_writers.md @@ -70,6 +70,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute): ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0) ├── 2 outputs: (c, u) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 14.8 KiB ``` ```jldoctest netcdf1 @@ -83,6 +85,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute): ├── dimensions: zC(1), zF(1), xC(16), yF(16), xF(16), yC(16), time(0) ├── 2 outputs: (c, u) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 14.8 KiB ``` ```jldoctest netcdf1 @@ -98,6 +102,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute): ├── dimensions: zC(16), zF(17), xC(1), yF(1), xF(1), yC(1), time(0) ├── 2 outputs: (c, u) averaged on AveragedTimeInterval(window=20 seconds, stride=1, interval=1 minute) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 17.6 KiB ``` `NetCDFOutputWriter` also accepts output functions that write scalars and arrays to disk, @@ -148,6 +154,8 @@ NetCDFOutputWriter scheduled on IterationInterval(1): ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0) ├── 3 outputs: (profile, slice, scalar) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 17.8 KiB ``` See [`NetCDFOutputWriter`](@ref) for more information. @@ -196,7 +204,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes): ├── 3 outputs: (u, v, w) ├── array type: Array{Float64} ├── including: [:grid, :coriolis, :buoyancy, :closure] -└── max filesize: Inf YiB +├── file_splitting: NoFileSplitting +└── file size: 27.4 KiB ``` and a time- and horizontal-average of tracer `c` every 20 minutes of simulation time @@ -213,7 +222,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes): ├── 1 outputs: c averaged on AveragedTimeInterval(window=5 minutes, stride=1, interval=20 minutes) ├── array type: Array{Float64} ├── including: [:grid, :coriolis, :buoyancy, :closure] -└── max filesize: Inf YiB +├── file_splitting: NoFileSplitting +└── file size: 17.5 KiB ``` @@ -239,7 +249,7 @@ time `interval`. The ``t_i`` specify both the end of the averaging window and th Building an `AveragedTimeInterval` that averages over a 1 day window, every 4 days, ```jldoctest averaged_time_interval -using Oceananigans.OutputWriters: AveragedTimeInterval +using Oceananigans using Oceananigans.Units schedule = AveragedTimeInterval(4days, window=1day) @@ -253,7 +263,6 @@ to time-average its outputs before writing them to disk: ```jldoctest averaged_time_interval using Oceananigans -using Oceananigans.OutputWriters: JLD2OutputWriter using Oceananigans.Units model = NonhydrostaticModel(grid=RectilinearGrid(size=(1, 1, 1), extent=(1, 1, 1))) @@ -270,5 +279,6 @@ JLD2OutputWriter scheduled on TimeInterval(4 days): ├── 3 outputs: (u, v, w) averaged on AveragedTimeInterval(window=1 day, stride=2, interval=4 days) ├── array type: Array{Float64} ├── including: [:grid, :coriolis, :buoyancy, :closure] -└── max filesize: Inf YiB +├── file_splitting: NoFileSplitting +└── file size: 26.7 KiB ``` diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl index 6e74ae3180..743f53a512 100644 --- a/src/Oceananigans.jl +++ b/src/Oceananigans.jl @@ -103,7 +103,7 @@ export # Output writers NetCDFOutputWriter, JLD2OutputWriter, Checkpointer, TimeInterval, IterationInterval, AveragedTimeInterval, SpecifiedTimes, - AndSchedule, OrSchedule, written_names, + FileSizeLimit, AndSchedule, OrSchedule, written_names, # Output readers FieldTimeSeries, FieldDataset, InMemory, OnDisk, diff --git a/src/OutputWriters/OutputWriters.jl b/src/OutputWriters/OutputWriters.jl index 1d5de0f6b5..0f66cee682 100644 --- a/src/OutputWriters/OutputWriters.jl +++ b/src/OutputWriters/OutputWriters.jl @@ -2,7 +2,7 @@ module OutputWriters export JLD2OutputWriter, NetCDFOutputWriter, written_names, - Checkpointer, WindowedTimeAverage, + Checkpointer, WindowedTimeAverage, FileSizeLimit, TimeInterval, IterationInterval, WallTimeInterval, AveragedTimeInterval using CUDA @@ -15,6 +15,7 @@ using Oceananigans.Models using Oceananigans: AbstractOutputWriter using Oceananigans.Grids: interior_indices using Oceananigans.Utils: TimeInterval, IterationInterval, WallTimeInterval, instantiate +using Oceananigans.Utils: pretty_filesize using OffsetArrays diff --git a/src/OutputWriters/jld2_output_writer.jl b/src/OutputWriters/jld2_output_writer.jl index 37eeaee203..1298a1c584 100644 --- a/src/OutputWriters/jld2_output_writer.jl +++ b/src/OutputWriters/jld2_output_writer.jl @@ -2,14 +2,14 @@ using Printf using JLD2 using Oceananigans.Utils using Oceananigans.Models -using Oceananigans.Utils: TimeInterval, pretty_filesize, prettykeys +using Oceananigans.Utils: TimeInterval, prettykeys using Oceananigans.Fields: boundary_conditions, indices default_included_properties(::NonhydrostaticModel) = [:grid, :coriolis, :buoyancy, :closure] default_included_properties(::ShallowWaterModel) = [:grid, :coriolis, :closure] default_included_properties(::HydrostaticFreeSurfaceModel) = [:grid, :coriolis, :buoyancy, :closure] -mutable struct JLD2OutputWriter{O, T, D, IF, IN, KW} <: AbstractOutputWriter +mutable struct JLD2OutputWriter{O, T, D, IF, IN, FS, KW} <: AbstractOutputWriter filepath :: String outputs :: O schedule :: T @@ -17,7 +17,7 @@ mutable struct JLD2OutputWriter{O, T, D, IF, IN, KW} <: AbstractOutputWriter init :: IF including :: IN part :: Int - max_filesize :: Float64 + file_splitting :: FS overwrite_existing :: Bool verbose :: Bool jld2_kw :: KW @@ -32,7 +32,7 @@ ext(::Type{JLD2OutputWriter}) = ".jld2" indices = (:, :, :), with_halos = false, array_type = Array{Float64}, - max_filesize = Inf, + file_splitting = NoFileSplitting(), overwrite_existing = false, init = noinit, including = [:grid, :coriolis, :buoyancy, :closure], @@ -54,7 +54,7 @@ Keyword arguments ## Filenaming - `filename` (required): Descriptive filename. `".jld2"` is appended to `filename` in the file path - if `filename` does not end in `".jld2"`. + if `filename` does not end in `".jld2"`. - `dir`: Directory to save output to. Default: `"."` (current working directory). @@ -80,10 +80,12 @@ Keyword arguments ## File management -- `max_filesize`: The writer will stop writing to the output file once the file size exceeds `max_filesize`, - and write to a new one with a consistent naming scheme ending in `part1`, `part2`, etc. - Defaults to `Inf`. - +- `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with + `_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will + split the output file when its size exceeds `sz`. Another example is + `file_splitting = TimeInterval(30days)`, which will split files every 30 days of + simulation time. The default incurs no splitting (`NoFileSplitting()`). + - `overwrite_existing`: Remove existing files if their filenames conflict. Default: `false`. @@ -100,7 +102,7 @@ Keyword arguments - `verbose`: Log what the output writer is doing with statistics on compute/write times and file sizes. Default: `false`. -- `part`: The starting part number used if `max_filesize` is finite. +- `part`: The starting part number used when file splitting. Default: 1. - `jld2_kw`: Dict of kwargs to be passed to `jldopen` when data is written. @@ -138,7 +140,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes): ├── 3 outputs: (u, v, w) ├── array type: Array{Float64} ├── including: [:grid, :coriolis, :buoyancy, :closure] -└── max filesize: Inf YiB +├── file_splitting: NoFileSplitting +└── file size: 27.4 KiB ``` and a time- and horizontal-average of tracer ``c`` every 20 minutes of simulation time @@ -155,7 +158,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes): ├── 1 outputs: c averaged on AveragedTimeInterval(window=5 minutes, stride=1, interval=20 minutes) ├── array type: Array{Float64} ├── including: [:grid, :coriolis, :buoyancy, :closure] -└── max filesize: Inf YiB +├── file_splitting: NoFileSplitting +└── file size: 17.5 KiB ``` """ function JLD2OutputWriter(model, outputs; filename, schedule, @@ -163,7 +167,7 @@ function JLD2OutputWriter(model, outputs; filename, schedule, indices = (:, :, :), with_halos = false, array_type = Array{Float64}, - max_filesize = Inf, + file_splitting = NoFileSplitting(), overwrite_existing = false, init = noinit, including = default_included_properties(model), @@ -174,8 +178,9 @@ function JLD2OutputWriter(model, outputs; filename, schedule, mkpath(dir) filename = auto_extension(filename, ".jld2") filepath = joinpath(dir, filename) + update_file_splitting_schedule!(file_splitting, filepath) overwrite_existing && isfile(filepath) && rm(filepath, force=true) - + outputs = NamedTuple(Symbol(name) => construct_output(outputs[name], model.grid, indices, with_halos) for name in keys(outputs)) @@ -183,9 +188,9 @@ function JLD2OutputWriter(model, outputs; filename, schedule, schedule, outputs = time_average_outputs(schedule, outputs, model) initialize_jld2_file!(filepath, init, jld2_kw, including, outputs, model) - + return JLD2OutputWriter(filepath, outputs, schedule, array_type, init, - including, part, max_filesize, overwrite_existing, verbose, jld2_kw) + including, part, file_splitting, overwrite_existing, verbose, jld2_kw) end function initialize_jld2_file!(filepath, init, jld2_kw, including, outputs, model) @@ -247,18 +252,17 @@ end function write_output!(writer::JLD2OutputWriter, model) verbose = writer.verbose - path = writer.filepath current_iteration = model.clock.iteration # Some logic to handle writing to existing files - if iteration_exists(path, current_iteration) + if iteration_exists(writer.filepath, current_iteration) if writer.overwrite_existing # Something went wrong, so we remove the file and re-initialize it. - rm(path, force=true) + rm(writer.filepath, force=true) initialize_jld2_file!(writer, model) else # nothing we can do since we were asked not to overwrite_existing, so we skip output writing - @warn "Iteration $current_iteration was found in $path. Skipping output writing (for now...)" + @warn "Iteration $current_iteration was found in $(writer.filepath). Skipping output writing (for now...)" end else # ok let's do this @@ -271,16 +275,15 @@ function write_output!(writer::JLD2OutputWriter, model) verbose && @info "Fetching time: $(prettytime(tc))" - # Start a new file if the filesize exceeds max_filesize - filesize(path) >= writer.max_filesize && start_next_file(model, writer) - path = writer.filepath # we might have a new path... - + # Start a new file if the file_splitting(model) is true + writer.file_splitting(model) && start_next_file(model, writer) + update_file_splitting_schedule!(writer.file_splitting, writer.filepath) # Write output from `data` verbose && @info "Writing JLD2 output $(keys(writer.outputs)) to $path..." - start_time, old_filesize = time_ns(), filesize(path) - jld2output!(path, model.clock.iteration, model.clock.time, data, writer.jld2_kw) - end_time, new_filesize = time_ns(), filesize(path) + start_time, old_filesize = time_ns(), filesize(writer.filepath) + jld2output!(writer.filepath, model.clock.iteration, model.clock.time, data, writer.jld2_kw) + end_time, new_filesize = time_ns(), filesize(writer.filepath) verbose && @info @sprintf("Writing done: time=%s, size=%s, Δsize=%s", prettytime((end_time - start_time) / 1e9), @@ -311,9 +314,10 @@ end function start_next_file(model, writer::JLD2OutputWriter) verbose = writer.verbose - sz = filesize(writer.filepath) + verbose && @info begin - "Filesize $(pretty_filesize(sz)) has exceeded maximum file size $(pretty_filesize(writer.max_filesize))." + schedule_type = summary(writer.file_splitting) + "Splitting output because $(schedule_type) is activated." end if writer.part == 1 @@ -329,7 +333,7 @@ function start_next_file(model, writer::JLD2OutputWriter) verbose && @info "Now writing to: $(writer.filepath)" initialize_jld2_file!(writer, model) - + return nothing end @@ -346,5 +350,6 @@ function Base.show(io::IO, ow::JLD2OutputWriter) "├── $Noutputs outputs: ", prettykeys(ow.outputs), show_averaging_schedule(averaging_schedule), "\n", "├── array type: ", show_array_type(ow.array_type), "\n", "├── including: ", ow.including, "\n", - "└── max filesize: ", pretty_filesize(ow.max_filesize)) + "├── file_splitting: ", summary(ow.file_splitting), "\n", + "└── file size: ", pretty_filesize(filesize(ow.filepath))) end diff --git a/src/OutputWriters/netcdf_output_writer.jl b/src/OutputWriters/netcdf_output_writer.jl index 0c02e6f627..0fb468a117 100644 --- a/src/OutputWriters/netcdf_output_writer.jl +++ b/src/OutputWriters/netcdf_output_writer.jl @@ -10,7 +10,7 @@ using Oceananigans.Utils: versioninfo_with_gpu, oceananigans_versioninfo, pretty using Oceananigans.TimeSteppers: float_or_date_time using Oceananigans.Fields: reduced_dimensions, reduced_location, location, validate_indices -mutable struct NetCDFOutputWriter{D, O, T, A} <: AbstractOutputWriter +mutable struct NetCDFOutputWriter{D, O, T, A, FS} <: AbstractOutputWriter filepath :: String dataset :: D outputs :: O @@ -24,7 +24,7 @@ mutable struct NetCDFOutputWriter{D, O, T, A} <: AbstractOutputWriter overwrite_existing :: Bool deflatelevel :: Int part :: Int - max_filesize :: Float64 + file_splitting :: FS verbose :: Bool end @@ -173,7 +173,7 @@ end overwrite_existing = false, deflatelevel = 0, part = 1, - max_filesize = Inf, + file_splitting = NoFileSplitting(), verbose = false) Construct a `NetCDFOutputWriter` that writes `(label, output)` pairs in `outputs` (which should @@ -222,15 +222,19 @@ Keyword arguments and 9 means maximum compression). See [NCDatasets.jl documentation](https://alexander-barth.github.io/NCDatasets.jl/stable/variables/#Creating-a-variable) for more information. -- `part`: The starting part number used if `max_filesize` is finite. - Default: 1. - -- `max_filesize`: The writer will stop writing to the output file once the file size exceeds `max_filesize`, - and write to a new one with a consistent naming scheme ending in `part1`, `part2`, etc. - Defaults to `Inf`. +- `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with + `_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will + split the output file when its size exceeds `sz`. Another example is + `file_splitting = TimeInterval(30days)`, which will split files every 30 days of + simulation time. The default incurs no splitting (`NoFileSplitting()`). ## Miscellaneous keywords +- `verbose`: Log what the output writer is doing with statistics on compute/write times and file sizes. + Default: `false`. + +- `part`: The starting part number used when file splitting. + - `global_attributes`: Dict of model properties to save with every file. Default: `Dict()`. - `output_attributes`: Dict of attributes to be saved with each field variable (reasonable @@ -263,6 +267,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute): ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0) ├── 2 outputs: (c, u) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 14.8 KiB ``` ```jldoctest netcdf1 @@ -276,6 +282,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute): ├── dimensions: zC(1), zF(1), xC(16), yF(16), xF(16), yC(16), time(0) ├── 2 outputs: (c, u) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 14.8 KiB ``` ```jldoctest netcdf1 @@ -291,6 +299,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute): ├── dimensions: zC(16), zF(17), xC(1), yF(1), xF(1), yC(1), time(0) ├── 2 outputs: (c, u) averaged on AveragedTimeInterval(window=20 seconds, stride=1, interval=1 minute) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 17.6 KiB ``` `NetCDFOutputWriter` also accepts output functions that write scalars and arrays to disk, @@ -341,6 +351,8 @@ NetCDFOutputWriter scheduled on IterationInterval(1): ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0) ├── 3 outputs: (profile, slice, scalar) └── array type: Array{Float64} +├── file_splitting: NoFileSplitting +└── file size: 17.8 KiB ``` """ function NetCDFOutputWriter(model, outputs; filename, schedule, @@ -354,12 +366,14 @@ function NetCDFOutputWriter(model, outputs; filename, schedule, overwrite_existing = nothing, deflatelevel = 0, part = 1, - max_filesize = Inf, + file_splitting = NoFileSplitting(), verbose = false) mkpath(dir) filename = auto_extension(filename, ".nc") filepath = joinpath(dir, filename) + update_file_splitting_schedule!(file_splitting, filepath) + if isnothing(overwrite_existing) if isfile(filepath) overwrite_existing = false @@ -415,7 +429,7 @@ function NetCDFOutputWriter(model, outputs; filename, schedule, overwrite_existing, deflatelevel, part, - max_filesize, + file_splitting, verbose) end @@ -485,9 +499,9 @@ Write output to netcdf file `output_writer.filepath` at specified intervals. Inc every time an output is written to the file. """ function write_output!(ow::NetCDFOutputWriter, model) - # TODO allow user to split by number of snapshots, rathern than filesize. - # Start a new file if the filesize exceeds max_filesize - filesize(ow.filepath) ≥ ow.max_filesize && start_next_file(model, ow) + # Start a new file if the file_splitting(model) is true + ow.file_splitting(model) && start_next_file(model, ow) + update_file_splitting_schedule!(ow.file_splitting, ow.filepath) ow.dataset = open(ow) @@ -556,7 +570,9 @@ function Base.show(io::IO, ow::NetCDFOutputWriter) "├── filepath: ", ow.filepath, "\n", "├── dimensions: $dims", "\n", "├── $Noutputs outputs: ", prettykeys(ow.outputs), show_averaging_schedule(averaging_schedule), "\n", - "└── array type: ", show_array_type(ow.array_type)) + "└── array type: ", show_array_type(ow.array_type), "\n", + "├── file_splitting: ", summary(ow.file_splitting), "\n", + "└── file size: ", pretty_filesize(filesize(ow.filepath))) end ##### @@ -577,11 +593,16 @@ dictify(outputs::LagrangianParticles) = Dict("particles" => outputs) default_dimensions(outputs::Dict{String,<:LagrangianParticles}, grid, indices, with_halos) = Dict("particle_id" => collect(1:length(outputs["particles"]))) +##### +##### File splitting +##### + function start_next_file(model, ow::NetCDFOutputWriter) verbose = ow.verbose - sz = filesize(ow.filepath) + verbose && @info begin - "Filesize $(pretty_filesize(sz)) has exceeded maximum file size $(pretty_filesize(ow.max_filesize))." + schedule_type = summary(ow.file_splitting) + "Splitting output because $(schedule_type) is activated." end if ow.part == 1 diff --git a/src/OutputWriters/output_writer_utils.jl b/src/OutputWriters/output_writer_utils.jl index 7225d69182..b053e84e55 100644 --- a/src/OutputWriters/output_writer_utils.jl +++ b/src/OutputWriters/output_writer_utils.jl @@ -6,11 +6,52 @@ using Oceananigans.Fields: AbstractField, indices, boundary_conditions, instanti using Oceananigans.BoundaryConditions: bc_str, FieldBoundaryConditions, ContinuousBoundaryFunction, DiscreteBoundaryFunction using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper, RungeKutta3TimeStepper using Oceananigans.Models.LagrangianParticleTracking: LagrangianParticles +using Oceananigans.Utils: AbstractSchedule ##### ##### Output writer utilities ##### +mutable struct FileSizeLimit <: AbstractSchedule + size_limit :: Float64 + path :: String +end + +""" + FileSizeLimit(size_limit [, path=""]) + +Return a schedule that actuates when the file at `path` exceeds +the `size_limit`. + +The `path` is automatically added and updated when `FileSizeLimit` is +used with an output writer, and should not be provided manually. +""" +FileSizeLimit(size_limit) = FileSizeLimit(size_limit, "") + +(fsl::FileSizeLimit)(model) = filesize(fsl.path) ≥ fsl.size_limit + +function Base.summary(fsl::FileSizeLimit) + current_size_str = pretty_filesize(filesize(fsl.path)) + size_limit_str = pretty_filesize(fsl.size_limit) + return string("FileSizeLimit(size_limit=", size_limit_str, + ", path=", fsl.path, " (", current_size_str, ")") +end + +Base.show(io::IO, fsl::FileSizeLimit) = print(io, summary(fsl)) + +# Update schedule based on user input +update_file_splitting_schedule!(schedule, filepath) = nothing + +function update_file_splitting_schedule!(schedule::FileSizeLimit, filepath) + schedule.path = filepath + return nothing +end + +struct NoFileSplitting end +(::NoFileSplitting)(model) = false +Base.summary(::NoFileSplitting) = "NoFileSplitting" +Base.show(io::IO, nfs::NoFileSplitting) = print(io, summary(nfs)) + """ ext(ow) diff --git a/src/OutputWriters/windowed_time_average.jl b/src/OutputWriters/windowed_time_average.jl index 0a5c29b372..1a49615f48 100644 --- a/src/OutputWriters/windowed_time_average.jl +++ b/src/OutputWriters/windowed_time_average.jl @@ -61,8 +61,7 @@ to time-average its outputs before writing them to disk: ```jldoctest averaged_time_interval using Oceananigans -using Oceananigans.OutputWriters: JLD2OutputWriter -using Oceananigans.Utils: minutes +using Oceananigans.Units model = NonhydrostaticModel(grid=RectilinearGrid(size=(1, 1, 1), extent=(1, 1, 1))) @@ -78,7 +77,8 @@ JLD2OutputWriter scheduled on TimeInterval(4 days): ├── 3 outputs: (u, v, w) averaged on AveragedTimeInterval(window=2 days, stride=2, interval=4 days) ├── array type: Array{Float64} ├── including: [:grid, :coriolis, :buoyancy, :closure] -└── max filesize: Inf YiB +├── file_splitting: NoFileSplitting +└── file size: 26.7 KiB ``` """ function AveragedTimeInterval(interval; window=interval, stride=1) diff --git a/test/test_jld2_output_writer.jl b/test/test_jld2_output_writer.jl index 905a2fa087..0fdeb670c6 100644 --- a/test/test_jld2_output_writer.jl +++ b/test/test_jld2_output_writer.jl @@ -49,7 +49,6 @@ function test_jld2_file_splitting(arch) function fake_bc_init(file, model) file["boundary_conditions/fake"] = π end - ow = JLD2OutputWriter(model, (; u=model.velocities.u); dir = ".", filename = "test.jld2", @@ -58,7 +57,7 @@ function test_jld2_file_splitting(arch) including = [:grid], array_type = Array{Float64}, with_halos = true, - max_filesize = 200KiB, + file_splitting = FileSizeLimit(200KiB), overwrite_existing = true) push!(simulation.output_writers, ow) @@ -70,6 +69,7 @@ function test_jld2_file_splitting(arch) @test filesize("test_part1.jld2") > 200KiB @test filesize("test_part2.jld2") > 200KiB @test filesize("test_part3.jld2") < 200KiB + @test !isfile("test_part4.jld2") for n in string.(1:3) filename = "test_part$n.jld2" diff --git a/test/test_netcdf_output_writer.jl b/test/test_netcdf_output_writer.jl index d7a59143ba..4bef67d293 100644 --- a/test/test_netcdf_output_writer.jl +++ b/test/test_netcdf_output_writer.jl @@ -52,8 +52,6 @@ function test_netcdf_file_splitting(arch) fake_attributes = Dict("fake_attribute"=>"fake_attribute") - max_filesize = 200KiB - ow = NetCDFOutputWriter(model, (; u=model.velocities.u); dir = ".", filename = "test.nc", @@ -61,7 +59,7 @@ function test_netcdf_file_splitting(arch) array_type = Array{Float64}, with_halos = true, global_attributes = fake_attributes, - max_filesize, + file_splitting = FileSizeLimit(200KiB), overwrite_existing = true) push!(simulation.output_writers, ow) @@ -70,9 +68,9 @@ function test_netcdf_file_splitting(arch) run!(simulation) # Test that files has been split according to size as expected. - @test filesize("test_part1.nc") > max_filesize - @test filesize("test_part2.nc") > max_filesize - @test filesize("test_part3.nc") < max_filesize + @test filesize("test_part1.nc") > 200KiB + @test filesize("test_part2.nc") > 200KiB + @test filesize("test_part3.nc") < 200KiB @test !isfile("test_part4.nc") for n in string.(1:3) diff --git a/validation/barotropic_gyre/barotropic_gyre.jl b/validation/barotropic_gyre/barotropic_gyre.jl index db641b63c5..c522983a7a 100644 --- a/validation/barotropic_gyre/barotropic_gyre.jl +++ b/validation/barotropic_gyre/barotropic_gyre.jl @@ -3,8 +3,6 @@ using Oceananigans using Oceananigans.Grids -using Oceananigans.Coriolis: HydrostaticSphericalCoriolis - using Oceananigans.Advection: EnergyConserving, EnstrophyConserving using Oceananigans.Models.HydrostaticFreeSurfaceModels: @@ -13,11 +11,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: ExplicitFreeSurface, ImplicitFreeSurface - -using Oceananigans.Utils: prettytime, hours, day, days, years -using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval - -using Oceananigans.ImmersedBoundaries: ImmersedBoundaryGrid, GridFittedBoundary, GridFittedBottom +using Oceananigans.Units using Statistics using JLD2 diff --git a/validation/solid_body_rotation/solid_body_rotation.jl b/validation/solid_body_rotation/solid_body_rotation.jl index 98e52ed16f..c3811cd4b7 100644 --- a/validation/solid_body_rotation/solid_body_rotation.jl +++ b/validation/solid_body_rotation/solid_body_rotation.jl @@ -27,7 +27,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: ExplicitFreeSurface using Oceananigans.Utils: prettytime, hours -using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval using Statistics using JLD2 diff --git a/validation/solid_body_rotation/solid_body_tracer_advection.jl b/validation/solid_body_rotation/solid_body_tracer_advection.jl index 397aaccc54..fe05e17a4a 100644 --- a/validation/solid_body_rotation/solid_body_tracer_advection.jl +++ b/validation/solid_body_rotation/solid_body_tracer_advection.jl @@ -27,7 +27,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: PrescribedVelocityFields using Oceananigans.Utils: prettytime, hours -using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval using JLD2 using Printf