Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests for splitting output files using TimeInterval #3523

Merged
merged 12 commits into from
Mar 28, 2024
2 changes: 1 addition & 1 deletion src/OutputWriters/OutputWriters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module OutputWriters

export
JLD2OutputWriter, NetCDFOutputWriter, written_names,
Checkpointer, WindowedTimeAverage, FileSizeLimit,
Checkpointer, WindowedTimeAverage, FileSizeLimit, FileTimeSplit,
TimeInterval, IterationInterval, WallTimeInterval, AveragedTimeInterval

using CUDA
Expand Down
4 changes: 2 additions & 2 deletions src/OutputWriters/jld2_output_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Keyword arguments
- `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with
`_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will
split the output file when its size exceeds `sz`. Another example is
`file_splitting = TimeInterval(30days)`, which will split files every 30 days of
`file_splitting = FileTimeSplit(30days)`, which will split files every 30 days of
simulation time. The default incurs no splitting (`NoFileSplitting()`).

- `overwrite_existing`: Remove existing files if their filenames conflict.
Expand Down Expand Up @@ -276,7 +276,7 @@ function write_output!(writer::JLD2OutputWriter, model)
verbose && @info "Fetching time: $(prettytime(tc))"

# Start a new file if the file_splitting(model) is true
writer.file_splitting(model) && start_next_file(model, writer)
writer.file_splitting(model,writer) && start_next_file(model, writer)
update_file_splitting_schedule!(writer.file_splitting, writer.filepath)
# Write output from `data`
verbose && @info "Writing JLD2 output $(keys(writer.outputs)) to $path..."
Expand Down
4 changes: 2 additions & 2 deletions src/OutputWriters/netcdf_output_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ Keyword arguments
- `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with
`_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will
split the output file when its size exceeds `sz`. Another example is
`file_splitting = TimeInterval(30days)`, which will split files every 30 days of
`file_splitting = FileTimeSplit(30days)`, which will split files every 30 days of
simulation time. The default incurs no splitting (`NoFileSplitting()`).

## Miscellaneous keywords
Expand Down Expand Up @@ -500,7 +500,7 @@ every time an output is written to the file.
"""
function write_output!(ow::NetCDFOutputWriter, model)
# Start a new file if the file_splitting(model) is true
ow.file_splitting(model) && start_next_file(model, ow)
ow.file_splitting(model,ow) && start_next_file(model, ow)
update_file_splitting_schedule!(ow.file_splitting, ow.filepath)

ow.dataset = open(ow)
Expand Down
45 changes: 40 additions & 5 deletions src/OutputWriters/output_writer_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ using Oceananigans.Utils: AbstractSchedule
##### Output writer utilities
#####

struct NoFileSplitting end
(::NoFileSplitting)(model) = false
Base.summary(::NoFileSplitting) = "NoFileSplitting"
Base.show(io::IO, nfs::NoFileSplitting) = print(io, summary(nfs))


josuemtzmo marked this conversation as resolved.
Show resolved Hide resolved
mutable struct FileSizeLimit <: AbstractSchedule
size_limit :: Float64
path :: String
Expand Down Expand Up @@ -39,6 +45,40 @@ end

Base.show(io::IO, fsl::FileSizeLimit) = print(io, summary(fsl))


mutable struct FileTimeSplit <: AbstractSchedule
interval :: Float64
path :: String
end

"""
FileTimeSplit(time_split [, path=""])

Return a schedule that actuates when the file at `path` exceeds
the `time_split`.

The `path` is automatically added and updated when `FileTimeSplit` is
used with an output writer, and should not be provided manually.
"""
FileTimeSplit(time_split) = FileTimeSplit(time_split, "")

function (fts::FileTimeSplit)(model,writer)
split_file = false
if model.clock.iteration ≠ 0
split_file = (model.clock.iteration/ writer.schedule.interval) % (fts.interval/ writer.schedule.interval) == 0
end
return split_file
end

function Base.summary(fts::FileTimeSplit)
current_size_str = pretty_filesize(filesize(fts.path))
time_split_str = pretty_filesize(fts.time_split)
return string("FileTimeSplit(time_split=", time_split_str,
", path=", fts.path, " (", current_size_str, ")")
end

Base.show(io::IO, fts::FileSizeLimit) = print(io, summary(fts))

# Update schedule based on user input
update_file_splitting_schedule!(schedule, filepath) = nothing

Expand All @@ -47,11 +87,6 @@ function update_file_splitting_schedule!(schedule::FileSizeLimit, filepath)
return nothing
end

struct NoFileSplitting end
(::NoFileSplitting)(model) = false
Base.summary(::NoFileSplitting) = "NoFileSplitting"
Base.show(io::IO, nfs::NoFileSplitting) = print(io, summary(nfs))

"""
ext(ow)

Expand Down
57 changes: 52 additions & 5 deletions test/test_jld2_output_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ function jld2_sliced_field_output(model, outputs=model.velocities)
return size(u₁) == (2, 2, 4) && size(v₁) == (2, 2, 4) && size(w₁) == (2, 2, 5)
end

function test_jld2_file_splitting(arch)
function test_jld2_file_splitting_size(arch)
grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
simulation = Simulation(model, Δt=1, stop_iteration=10)
Expand Down Expand Up @@ -88,6 +88,52 @@ function test_jld2_file_splitting(arch)
return nothing
end

function test_jld2_file_splitting_time(arch)
grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
simulation = Simulation(model, Δt=1, stop_iteration=10)

function fake_bc_init(file, model)
file["boundary_conditions/fake"] = π
end
ow = JLD2OutputWriter(model, (; u=model.velocities.u);
dir = ".",
filename = "test.jld2",
schedule = IterationInterval(1),
init = fake_bc_init,
including = [:grid],
array_type = Array{Float64},
with_halos = true,
file_splitting = FileTimeSplit(3seconds),
josuemtzmo marked this conversation as resolved.
Show resolved Hide resolved
overwrite_existing = true)

push!(simulation.output_writers, ow)

# 531 KiB of output will be written which should get split into 3 files.
josuemtzmo marked this conversation as resolved.
Show resolved Hide resolved
run!(simulation)

for n in string.(1:3)
filename = "test_part$n.jld2"
jldopen(filename, "r") do file
# Test to make sure all files contain structs from `including`.
@test file["grid/Nx"] == 16

# Test to make sure all files contain the same number of snapshots.
dimlength = length(file["timeseries/t"])
@test dimlength == 3

# Test to make sure all files contain info from `init` function.
@test file["boundary_conditions/fake"] == π
end

# Leave test directory clean.
rm(filename)
end
rm("test_part4.jld2")

return nothing
end

function test_jld2_time_averaging_of_horizontal_averages(model)

model.clock.iteration = 0
Expand Down Expand Up @@ -266,11 +312,12 @@ for arch in archs
test_field_slicing("sliced_funcs_jld2_test.jld2", ("u", "v", "w"), (4, 4, 4), (4, 4, 4), (4, 4, 5))
test_field_slicing("sliced_func_fields_jld2_test.jld2", ("αt", "background_u"), (2, 4, 4), (2, 4, 4))

#####
##### File splitting
#####
####
#### File splitting
####

test_jld2_file_splitting(arch)
test_jld2_file_splitting_size(arch)
test_jld2_file_splitting_time(arch)
josuemtzmo marked this conversation as resolved.
Show resolved Hide resolved

#####
##### Time-averaging
Expand Down
47 changes: 44 additions & 3 deletions test/test_netcdf_output_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function test_DateTime_netcdf_output(arch)
return nothing
end

function test_netcdf_file_splitting(arch)
function test_netcdf_file_splitting_size(arch)
grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
simulation = Simulation(model, Δt=1, stop_iteration=10)
Expand Down Expand Up @@ -90,6 +90,46 @@ function test_netcdf_file_splitting(arch)
return nothing
end

function test_netcdf_file_splitting_time(arch)
grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
simulation = Simulation(model, Δt=1, stop_iteration=12seconds)

fake_attributes = Dict("fake_attribute"=>"fake_attribute")

ow = NetCDFOutputWriter(model, (; u=model.velocities.u);
dir = ".",
filename = "test.nc",
schedule = IterationInterval(2),
array_type = Array{Float64},
with_halos = true,
global_attributes = fake_attributes,
file_splitting = FileTimeSplit(4seconds),
josuemtzmo marked this conversation as resolved.
Show resolved Hide resolved
overwrite_existing = true)

push!(simulation.output_writers, ow)

# 531 KiB of output will be written which should get split into 3 files.
josuemtzmo marked this conversation as resolved.
Show resolved Hide resolved
run!(simulation)

for n in string.(1:3)
filename = "test_part$n.nc"
ds = NCDataset(filename,"r")
dimlength = length(ds["time"])
# Test that all files contain the same dimensions.
@test dimlength == 2
# Test that all files contain the user defined attributes.
@test ds.attrib["fake_attribute"] == "fake_attribute"

# Leave test directory clean.
close(ds)
# rm(filename)
end
# rm("test_part4.nc")

return nothing
end

function test_TimeDate_netcdf_output(arch)
grid = RectilinearGrid(arch, size=(1, 1, 1), extent=(1, 1, 1))
clock = Clock(time=TimeDate(2021, 1, 1))
Expand Down Expand Up @@ -880,7 +920,8 @@ for arch in archs
@testset "NetCDF output writer [$(typeof(arch))]" begin
@info " Testing NetCDF output writer [$(typeof(arch))]..."
test_DateTime_netcdf_output(arch)
test_netcdf_file_splitting(arch)
test_netcdf_file_splitting_size(arch)
test_netcdf_file_splitting_time(arch)
test_TimeDate_netcdf_output(arch)
test_thermal_bubble_netcdf_output(arch)
test_thermal_bubble_netcdf_output_with_halos(arch)
Expand All @@ -891,4 +932,4 @@ for arch in archs
test_netcdf_vertically_stretched_grid_output(arch)
test_netcdf_regular_lat_lon_grid_output(arch)
end
end
end
navidcy marked this conversation as resolved.
Show resolved Hide resolved