CliMA · josuemtzmo · Mar 28, 2024 · Mar 26, 2024 · Mar 26, 2024 · Mar 27, 2024
diff --git a/src/OutputWriters/OutputWriters.jl b/src/OutputWriters/OutputWriters.jl
@@ -2,7 +2,7 @@ module OutputWriters
 
 export
     JLD2OutputWriter, NetCDFOutputWriter, written_names,
-    Checkpointer, WindowedTimeAverage, FileSizeLimit,
+    Checkpointer, WindowedTimeAverage, FileSizeLimit, FileTimeSplit,
     TimeInterval, IterationInterval, WallTimeInterval, AveragedTimeInterval
 
 using CUDA

diff --git a/src/OutputWriters/jld2_output_writer.jl b/src/OutputWriters/jld2_output_writer.jl
@@ -83,7 +83,7 @@ Keyword arguments
 - `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with
                     `_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will
                     split the output file when its size exceeds `sz`. Another example is 
-                    `file_splitting = TimeInterval(30days)`, which will split files every 30 days of
+                    `file_splitting = FileTimeSplit(30days)`, which will split files every 30 days of
                     simulation time. The default incurs no splitting (`NoFileSplitting()`).
 
 - `overwrite_existing`: Remove existing files if their filenames conflict.
@@ -276,7 +276,7 @@ function write_output!(writer::JLD2OutputWriter, model)
         verbose && @info "Fetching time: $(prettytime(tc))"
 
         # Start a new file if the file_splitting(model) is true
-        writer.file_splitting(model) && start_next_file(model, writer)
+        writer.file_splitting(model,writer) && start_next_file(model, writer)
         update_file_splitting_schedule!(writer.file_splitting, writer.filepath)
         # Write output from `data`
         verbose && @info "Writing JLD2 output $(keys(writer.outputs)) to $path..."

diff --git a/src/OutputWriters/netcdf_output_writer.jl b/src/OutputWriters/netcdf_output_writer.jl
@@ -225,7 +225,7 @@ Keyword arguments
 - `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with
           `_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will
           split the output file when its size exceeds `sz`. Another example is 
-          `file_splitting = TimeInterval(30days)`, which will split files every 30 days of
+          `file_splitting = FileTimeSplit(30days)`, which will split files every 30 days of
           simulation time. The default incurs no splitting (`NoFileSplitting()`).
 
 ## Miscellaneous keywords
@@ -500,7 +500,7 @@ every time an output is written to the file.
 """
 function write_output!(ow::NetCDFOutputWriter, model)
     # Start a new file if the file_splitting(model) is true
-    ow.file_splitting(model) && start_next_file(model, ow)
+    ow.file_splitting(model,ow) && start_next_file(model, ow)
     update_file_splitting_schedule!(ow.file_splitting, ow.filepath)
 
     ow.dataset = open(ow)

diff --git a/src/OutputWriters/output_writer_utils.jl b/src/OutputWriters/output_writer_utils.jl
@@ -12,6 +12,12 @@ using Oceananigans.Utils: AbstractSchedule
 ##### Output writer utilities
 #####
 
+struct NoFileSplitting end
+(::NoFileSplitting)(model) = false
+Base.summary(::NoFileSplitting) = "NoFileSplitting" 
+Base.show(io::IO, nfs::NoFileSplitting) = print(io, summary(nfs))
+
+
 mutable struct FileSizeLimit <: AbstractSchedule
     size_limit :: Float64
     path :: String
@@ -39,6 +45,40 @@ end
 
 Base.show(io::IO, fsl::FileSizeLimit) = print(io, summary(fsl))
 
+
+mutable struct FileTimeSplit <: AbstractSchedule
+    interval :: Float64
+    path :: String
+end
+
+"""
+    FileTimeSplit(time_split [, path=""])
+
+Return a schedule that actuates when the file at `path` exceeds
+the `time_split`.
+
+The `path` is automatically added and updated when `FileTimeSplit` is
+used with an output writer, and should not be provided manually.
+"""
+FileTimeSplit(time_split) = FileTimeSplit(time_split, "")
+
+function (fts::FileTimeSplit)(model,writer) 
+    split_file = false
+    if model.clock.iteration ≠ 0
+        split_file = (model.clock.iteration/ writer.schedule.interval) % (fts.interval/ writer.schedule.interval) == 0
+    end
+    return split_file
+end
+
+function Base.summary(fts::FileTimeSplit)
+    current_size_str = pretty_filesize(filesize(fts.path))
+    time_split_str = pretty_filesize(fts.time_split)
+    return string("FileTimeSplit(time_split=", time_split_str,
+                              ", path=", fts.path, " (", current_size_str, ")")
+end
+
+Base.show(io::IO, fts::FileSizeLimit) = print(io, summary(fts))
+
 # Update schedule based on user input
 update_file_splitting_schedule!(schedule, filepath) = nothing
 
@@ -47,11 +87,6 @@ function update_file_splitting_schedule!(schedule::FileSizeLimit, filepath)
     return nothing
 end 
 
-struct NoFileSplitting end
-(::NoFileSplitting)(model) = false
-Base.summary(::NoFileSplitting) = "NoFileSplitting" 
-Base.show(io::IO, nfs::NoFileSplitting) = print(io, summary(nfs))
-
 """
     ext(ow)
 

diff --git a/test/test_jld2_output_writer.jl b/test/test_jld2_output_writer.jl
@@ -41,7 +41,7 @@ function jld2_sliced_field_output(model, outputs=model.velocities)
     return size(u₁) == (2, 2, 4) && size(v₁) == (2, 2, 4) && size(w₁) == (2, 2, 5)
 end
 
-function test_jld2_file_splitting(arch)
+function test_jld2_file_splitting_size(arch)
     grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
     model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
     simulation = Simulation(model, Δt=1, stop_iteration=10)
@@ -88,6 +88,52 @@ function test_jld2_file_splitting(arch)
     return nothing
 end
 
+function test_jld2_file_splitting_time(arch)
+    grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
+    model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
+    simulation = Simulation(model, Δt=1, stop_iteration=10)
+
+    function fake_bc_init(file, model)
+        file["boundary_conditions/fake"] = π
+    end
+    ow = JLD2OutputWriter(model, (; u=model.velocities.u);
+                          dir = ".",
+                          filename = "test.jld2",
+                          schedule = IterationInterval(1),
+                          init = fake_bc_init,
+                          including = [:grid],
+                          array_type = Array{Float64},
+                          with_halos = true,
+                          file_splitting = FileTimeSplit(3seconds),
+                          overwrite_existing = true)
+
+    push!(simulation.output_writers, ow)
+
+    # 531 KiB of output will be written which should get split into 3 files.
+    run!(simulation)
+
+    for n in string.(1:3)
+        filename = "test_part$n.jld2"
+        jldopen(filename, "r") do file
+            # Test to make sure all files contain structs from `including`.
+            @test file["grid/Nx"] == 16
+
+            # Test to make sure all files contain the same number of snapshots.
+            dimlength = length(file["timeseries/t"])
+            @test dimlength == 3
+
+            # Test to make sure all files contain info from `init` function.
+            @test file["boundary_conditions/fake"] == π
+        end
+
+        # Leave test directory clean.
+        rm(filename)
+    end
+    rm("test_part4.jld2")
+
+    return nothing
+end
+
 function test_jld2_time_averaging_of_horizontal_averages(model)
 
     model.clock.iteration = 0
@@ -266,11 +312,12 @@ for arch in archs
         test_field_slicing("sliced_funcs_jld2_test.jld2", ("u", "v", "w"), (4, 4, 4), (4, 4, 4), (4, 4, 5))
         test_field_slicing("sliced_func_fields_jld2_test.jld2", ("αt", "background_u"), (2, 4, 4), (2, 4, 4))
 
-        #####
-        ##### File splitting
-        #####
+        ####
+        #### File splitting
+        ####
 
-        test_jld2_file_splitting(arch)
+        test_jld2_file_splitting_size(arch)
+        test_jld2_file_splitting_time(arch)
 
         #####
         ##### Time-averaging

diff --git a/test/test_netcdf_output_writer.jl b/test/test_netcdf_output_writer.jl
@@ -45,7 +45,7 @@ function test_DateTime_netcdf_output(arch)
     return nothing
 end
 
-function test_netcdf_file_splitting(arch)
+function test_netcdf_file_splitting_size(arch)
     grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
     model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
     simulation = Simulation(model, Δt=1, stop_iteration=10)
@@ -90,6 +90,46 @@ function test_netcdf_file_splitting(arch)
     return nothing
 end
 
+function test_netcdf_file_splitting_time(arch)
+    grid = RectilinearGrid(arch, size=(16, 16, 16), extent=(1, 1, 1), halo=(1, 1, 1))
+    model = NonhydrostaticModel(; grid, buoyancy=SeawaterBuoyancy(), tracers=(:T, :S))
+    simulation = Simulation(model, Δt=1, stop_iteration=12seconds)
+
+    fake_attributes = Dict("fake_attribute"=>"fake_attribute")
+
+    ow = NetCDFOutputWriter(model, (; u=model.velocities.u);
+                            dir = ".",
+                            filename = "test.nc",
+                            schedule = IterationInterval(2),
+                            array_type = Array{Float64},
+                            with_halos = true,
+                            global_attributes = fake_attributes,
+                            file_splitting = FileTimeSplit(4seconds),
+                            overwrite_existing = true)
+
+    push!(simulation.output_writers, ow)
+
+    # 531 KiB of output will be written which should get split into 3 files.
+    run!(simulation)
+
+    for n in string.(1:3)
+        filename = "test_part$n.nc"
+        ds = NCDataset(filename,"r")
+        dimlength = length(ds["time"])
+        # Test that all files contain the same dimensions.
+        @test dimlength == 2
+        # Test that all files contain the user defined attributes.
+        @test ds.attrib["fake_attribute"] == "fake_attribute"
+
+        # Leave test directory clean.
+        close(ds)
+        # rm(filename)
+    end
+    # rm("test_part4.nc")
+
+    return nothing
+end
+
 function test_TimeDate_netcdf_output(arch)
     grid = RectilinearGrid(arch, size=(1, 1, 1), extent=(1, 1, 1))
     clock = Clock(time=TimeDate(2021, 1, 1))
@@ -880,7 +920,8 @@ for arch in archs
     @testset "NetCDF output writer [$(typeof(arch))]" begin
         @info "  Testing NetCDF output writer [$(typeof(arch))]..."
         test_DateTime_netcdf_output(arch)
-        test_netcdf_file_splitting(arch)
+        test_netcdf_file_splitting_size(arch)
+        test_netcdf_file_splitting_time(arch)
         test_TimeDate_netcdf_output(arch)
         test_thermal_bubble_netcdf_output(arch)
         test_thermal_bubble_netcdf_output_with_halos(arch)
@@ -891,4 +932,4 @@ for arch in archs
         test_netcdf_vertically_stretched_grid_output(arch)
         test_netcdf_regular_lat_lon_grid_output(arch)
     end
-end
+end