From 1104449089c823e5b2e6e52f9e297c504935ee08 Mon Sep 17 00:00:00 2001
From: "Gregory L. Wagner" <wagner.greg@gmail.com>
Date: Sun, 24 Mar 2024 04:15:55 -0700
Subject: [PATCH] Generalize file splitting for output writers (#3515)

* Generalize file splitting for JLD2OutputWriter so that alternative criterion to file size may be used

* Update src/OutputWriters/jld2_output_writer.jl

Co-authored-by: Navid C. Constantinou <navidcy@users.noreply.github.com>

* Export FileSizeLimit and update JLD2OutputWriter test

* implementing file splitting in netcdf

* Properly export FileSizeLimit

* fix handeling of path writer.filepath with the file_splitting

* add support to file splitting by size in netCDFs

* add support to file splitting by size in netCDFs

* update warning  to properly print variable.

Co-authored-by: Gregory L. Wagner <wagner.greg@gmail.com>

* return to the use of FileSizeLimit(200KiB) in the test to make it easier to read

* update netcdf to match jld2, and add return in update_file_splitting_schedule

* fix tests filesize tests

* Apply suggestions from code review

* Update test_jld2_output_writer.jl

* fix show for NetCDFOutputWriter

* fix doctests

* Update src/OutputWriters/netcdf_output_writer.jl

Co-authored-by: Gregory L. Wagner <wagner.greg@gmail.com>

* Update src/OutputWriters/jld2_output_writer.jl

Co-authored-by: Gregory L. Wagner <wagner.greg@gmail.com>

* fix doctests

* fix doctests

* fix doctest

* cleanup unecessary imports

* fix doctest

* fix doctests

* fix doctests

---------

Co-authored-by: Navid C. Constantinou <navidcy@users.noreply.github.com>
Co-authored-by: josuemtzmo <josue.mtzmo@gmail.com>
---
 benchmark/benchmark_multi_GPU.jl              |  1 -
 docs/src/model_setup/output_writers.md        | 20 ++++--
 src/Oceananigans.jl                           |  2 +-
 src/OutputWriters/OutputWriters.jl            |  3 +-
 src/OutputWriters/jld2_output_writer.jl       | 67 ++++++++++---------
 src/OutputWriters/netcdf_output_writer.jl     | 55 ++++++++++-----
 src/OutputWriters/output_writer_utils.jl      | 41 ++++++++++++
 src/OutputWriters/windowed_time_average.jl    |  6 +-
 test/test_jld2_output_writer.jl               |  4 +-
 test/test_netcdf_output_writer.jl             | 10 ++-
 validation/barotropic_gyre/barotropic_gyre.jl |  8 +--
 .../solid_body_rotation.jl                    |  1 -
 .../solid_body_tracer_advection.jl            |  1 -
 13 files changed, 143 insertions(+), 76 deletions(-)

diff --git a/benchmark/benchmark_multi_GPU.jl b/benchmark/benchmark_multi_GPU.jl
index 0abc8b21ca..39886efa3c 100644
--- a/benchmark/benchmark_multi_GPU.jl
+++ b/benchmark/benchmark_multi_GPU.jl
@@ -12,7 +12,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels:
     ExplicitFreeSurface
 
 using Oceananigans.Utils: prettytime, hours
-using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval
 
 using Oceananigans.MultiRegion
 using Oceananigans.TurbulenceClosures: VerticallyImplicitTimeDiscretization
diff --git a/docs/src/model_setup/output_writers.md b/docs/src/model_setup/output_writers.md
index a9e6d72536..20ce934d17 100644
--- a/docs/src/model_setup/output_writers.md
+++ b/docs/src/model_setup/output_writers.md
@@ -70,6 +70,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute):
 ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0)
 ├── 2 outputs: (c, u)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 14.8 KiB
 ```
 
 ```jldoctest netcdf1
@@ -83,6 +85,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute):
 ├── dimensions: zC(1), zF(1), xC(16), yF(16), xF(16), yC(16), time(0)
 ├── 2 outputs: (c, u)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 14.8 KiB
 ```
 
 ```jldoctest netcdf1
@@ -98,6 +102,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute):
 ├── dimensions: zC(16), zF(17), xC(1), yF(1), xF(1), yC(1), time(0)
 ├── 2 outputs: (c, u) averaged on AveragedTimeInterval(window=20 seconds, stride=1, interval=1 minute)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 17.6 KiB
 ```
 
 `NetCDFOutputWriter` also accepts output functions that write scalars and arrays to disk,
@@ -148,6 +154,8 @@ NetCDFOutputWriter scheduled on IterationInterval(1):
 ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0)
 ├── 3 outputs: (profile, slice, scalar)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 17.8 KiB
 ```
 
 See [`NetCDFOutputWriter`](@ref) for more information.
@@ -196,7 +204,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes):
 ├── 3 outputs: (u, v, w)
 ├── array type: Array{Float64}
 ├── including: [:grid, :coriolis, :buoyancy, :closure]
-└── max filesize: Inf YiB
+├── file_splitting: NoFileSplitting
+└── file size: 27.4 KiB
 ```
 
 and a time- and horizontal-average of tracer `c` every 20 minutes of simulation time
@@ -213,7 +222,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes):
 ├── 1 outputs: c averaged on AveragedTimeInterval(window=5 minutes, stride=1, interval=20 minutes)
 ├── array type: Array{Float64}
 ├── including: [:grid, :coriolis, :buoyancy, :closure]
-└── max filesize: Inf YiB
+├── file_splitting: NoFileSplitting
+└── file size: 17.5 KiB
 ```
 
 
@@ -239,7 +249,7 @@ time `interval`. The ``t_i`` specify both the end of the averaging window and th
 Building an `AveragedTimeInterval` that averages over a 1 day window, every 4 days,
 
 ```jldoctest averaged_time_interval
-using Oceananigans.OutputWriters: AveragedTimeInterval
+using Oceananigans
 using Oceananigans.Units
 
 schedule = AveragedTimeInterval(4days, window=1day)
@@ -253,7 +263,6 @@ to time-average its outputs before writing them to disk:
 
 ```jldoctest averaged_time_interval
 using Oceananigans
-using Oceananigans.OutputWriters: JLD2OutputWriter
 using Oceananigans.Units
 
 model = NonhydrostaticModel(grid=RectilinearGrid(size=(1, 1, 1), extent=(1, 1, 1)))
@@ -270,5 +279,6 @@ JLD2OutputWriter scheduled on TimeInterval(4 days):
 ├── 3 outputs: (u, v, w) averaged on AveragedTimeInterval(window=1 day, stride=2, interval=4 days)
 ├── array type: Array{Float64}
 ├── including: [:grid, :coriolis, :buoyancy, :closure]
-└── max filesize: Inf YiB
+├── file_splitting: NoFileSplitting
+└── file size: 26.7 KiB
 ```
diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl
index 6e74ae3180..743f53a512 100644
--- a/src/Oceananigans.jl
+++ b/src/Oceananigans.jl
@@ -103,7 +103,7 @@ export
     # Output writers
     NetCDFOutputWriter, JLD2OutputWriter, Checkpointer,
     TimeInterval, IterationInterval, AveragedTimeInterval, SpecifiedTimes,
-    AndSchedule, OrSchedule, written_names,
+    FileSizeLimit, AndSchedule, OrSchedule, written_names,
 
     # Output readers
     FieldTimeSeries, FieldDataset, InMemory, OnDisk,
diff --git a/src/OutputWriters/OutputWriters.jl b/src/OutputWriters/OutputWriters.jl
index 1d5de0f6b5..0f66cee682 100644
--- a/src/OutputWriters/OutputWriters.jl
+++ b/src/OutputWriters/OutputWriters.jl
@@ -2,7 +2,7 @@ module OutputWriters
 
 export
     JLD2OutputWriter, NetCDFOutputWriter, written_names,
-    Checkpointer, WindowedTimeAverage,
+    Checkpointer, WindowedTimeAverage, FileSizeLimit,
     TimeInterval, IterationInterval, WallTimeInterval, AveragedTimeInterval
 
 using CUDA
@@ -15,6 +15,7 @@ using Oceananigans.Models
 using Oceananigans: AbstractOutputWriter
 using Oceananigans.Grids: interior_indices
 using Oceananigans.Utils: TimeInterval, IterationInterval, WallTimeInterval, instantiate
+using Oceananigans.Utils: pretty_filesize
 
 using OffsetArrays
 
diff --git a/src/OutputWriters/jld2_output_writer.jl b/src/OutputWriters/jld2_output_writer.jl
index 37eeaee203..1298a1c584 100644
--- a/src/OutputWriters/jld2_output_writer.jl
+++ b/src/OutputWriters/jld2_output_writer.jl
@@ -2,14 +2,14 @@ using Printf
 using JLD2
 using Oceananigans.Utils
 using Oceananigans.Models
-using Oceananigans.Utils: TimeInterval, pretty_filesize, prettykeys
+using Oceananigans.Utils: TimeInterval, prettykeys
 using Oceananigans.Fields: boundary_conditions, indices
 
 default_included_properties(::NonhydrostaticModel) = [:grid, :coriolis, :buoyancy, :closure]
 default_included_properties(::ShallowWaterModel) = [:grid, :coriolis, :closure]
 default_included_properties(::HydrostaticFreeSurfaceModel) = [:grid, :coriolis, :buoyancy, :closure]
 
-mutable struct JLD2OutputWriter{O, T, D, IF, IN, KW} <: AbstractOutputWriter
+mutable struct JLD2OutputWriter{O, T, D, IF, IN, FS, KW} <: AbstractOutputWriter
     filepath :: String
     outputs :: O
     schedule :: T
@@ -17,7 +17,7 @@ mutable struct JLD2OutputWriter{O, T, D, IF, IN, KW} <: AbstractOutputWriter
     init :: IF
     including :: IN
     part :: Int
-    max_filesize :: Float64
+    file_splitting :: FS
     overwrite_existing :: Bool
     verbose :: Bool
     jld2_kw :: KW
@@ -32,7 +32,7 @@ ext(::Type{JLD2OutputWriter}) = ".jld2"
                           indices = (:, :, :),
                        with_halos = false,
                        array_type = Array{Float64},
-                     max_filesize = Inf,
+                   file_splitting = NoFileSplitting(),
                overwrite_existing = false,
                              init = noinit,
                         including = [:grid, :coriolis, :buoyancy, :closure],
@@ -54,7 +54,7 @@ Keyword arguments
 ## Filenaming
 
 - `filename` (required): Descriptive filename. `".jld2"` is appended to `filename` in the file path
-                        if `filename` does not end in `".jld2"`.
+                         if `filename` does not end in `".jld2"`.
 
 - `dir`: Directory to save output to. Default: `"."` (current working directory).
 
@@ -80,10 +80,12 @@ Keyword arguments
 
 ## File management
 
-- `max_filesize`: The writer will stop writing to the output file once the file size exceeds `max_filesize`,
-                  and write to a new one with a consistent naming scheme ending in `part1`, `part2`, etc.
-                  Defaults to `Inf`.
-
+- `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with
+                    `_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will
+                    split the output file when its size exceeds `sz`. Another example is 
+                    `file_splitting = TimeInterval(30days)`, which will split files every 30 days of
+                    simulation time. The default incurs no splitting (`NoFileSplitting()`).
+                    
 - `overwrite_existing`: Remove existing files if their filenames conflict.
                         Default: `false`.
 
@@ -100,7 +102,7 @@ Keyword arguments
 - `verbose`: Log what the output writer is doing with statistics on compute/write times and file sizes.
              Default: `false`.
 
-- `part`: The starting part number used if `max_filesize` is finite.
+- `part`: The starting part number used when file splitting.
           Default: 1.
 
 - `jld2_kw`: Dict of kwargs to be passed to `jldopen` when data is written.
@@ -138,7 +140,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes):
 ├── 3 outputs: (u, v, w)
 ├── array type: Array{Float64}
 ├── including: [:grid, :coriolis, :buoyancy, :closure]
-└── max filesize: Inf YiB
+├── file_splitting: NoFileSplitting
+└── file size: 27.4 KiB
 ```
 
 and a time- and horizontal-average of tracer ``c`` every 20 minutes of simulation time
@@ -155,7 +158,8 @@ JLD2OutputWriter scheduled on TimeInterval(20 minutes):
 ├── 1 outputs: c averaged on AveragedTimeInterval(window=5 minutes, stride=1, interval=20 minutes)
 ├── array type: Array{Float64}
 ├── including: [:grid, :coriolis, :buoyancy, :closure]
-└── max filesize: Inf YiB
+├── file_splitting: NoFileSplitting
+└── file size: 17.5 KiB
 ```
 """
 function JLD2OutputWriter(model, outputs; filename, schedule,
@@ -163,7 +167,7 @@ function JLD2OutputWriter(model, outputs; filename, schedule,
                                indices = (:, :, :),
                             with_halos = false,
                             array_type = Array{Float64},
-                          max_filesize = Inf,
+                        file_splitting = NoFileSplitting(),
                     overwrite_existing = false,
                                   init = noinit,
                              including = default_included_properties(model),
@@ -174,8 +178,9 @@ function JLD2OutputWriter(model, outputs; filename, schedule,
     mkpath(dir)
     filename = auto_extension(filename, ".jld2")
     filepath = joinpath(dir, filename)
+    update_file_splitting_schedule!(file_splitting, filepath)
     overwrite_existing && isfile(filepath) && rm(filepath, force=true)
-    
+
     outputs = NamedTuple(Symbol(name) => construct_output(outputs[name], model.grid, indices, with_halos)
                          for name in keys(outputs))
 
@@ -183,9 +188,9 @@ function JLD2OutputWriter(model, outputs; filename, schedule,
     schedule, outputs = time_average_outputs(schedule, outputs, model)
 
     initialize_jld2_file!(filepath, init, jld2_kw, including, outputs, model)
-    
+
     return JLD2OutputWriter(filepath, outputs, schedule, array_type, init,
-                            including, part, max_filesize, overwrite_existing, verbose, jld2_kw)
+                            including, part, file_splitting, overwrite_existing, verbose, jld2_kw)
 end
 
 function initialize_jld2_file!(filepath, init, jld2_kw, including, outputs, model)
@@ -247,18 +252,17 @@ end
 function write_output!(writer::JLD2OutputWriter, model)
 
     verbose = writer.verbose
-    path = writer.filepath
     current_iteration = model.clock.iteration
 
     # Some logic to handle writing to existing files
-    if iteration_exists(path, current_iteration)
+    if iteration_exists(writer.filepath, current_iteration)
 
         if writer.overwrite_existing
             # Something went wrong, so we remove the file and re-initialize it.
-            rm(path, force=true)
+            rm(writer.filepath, force=true)
             initialize_jld2_file!(writer, model)
         else # nothing we can do since we were asked not to overwrite_existing, so we skip output writing
-            @warn "Iteration $current_iteration was found in $path. Skipping output writing (for now...)"
+            @warn "Iteration $current_iteration was found in $(writer.filepath). Skipping output writing (for now...)"
         end
 
     else # ok let's do this
@@ -271,16 +275,15 @@ function write_output!(writer::JLD2OutputWriter, model)
 
         verbose && @info "Fetching time: $(prettytime(tc))"
 
-        # Start a new file if the filesize exceeds max_filesize
-        filesize(path) >= writer.max_filesize && start_next_file(model, writer)
-        path = writer.filepath # we might have a new path...
-
+        # Start a new file if the file_splitting(model) is true
+        writer.file_splitting(model) && start_next_file(model, writer)
+        update_file_splitting_schedule!(writer.file_splitting, writer.filepath)
         # Write output from `data`
         verbose && @info "Writing JLD2 output $(keys(writer.outputs)) to $path..."
 
-        start_time, old_filesize = time_ns(), filesize(path)
-        jld2output!(path, model.clock.iteration, model.clock.time, data, writer.jld2_kw)
-        end_time, new_filesize = time_ns(), filesize(path)
+        start_time, old_filesize = time_ns(), filesize(writer.filepath)
+        jld2output!(writer.filepath, model.clock.iteration, model.clock.time, data, writer.jld2_kw)
+        end_time, new_filesize = time_ns(), filesize(writer.filepath)
 
         verbose && @info @sprintf("Writing done: time=%s, size=%s, Δsize=%s",
                                   prettytime((end_time - start_time) / 1e9),
@@ -311,9 +314,10 @@ end
 
 function start_next_file(model, writer::JLD2OutputWriter)
     verbose = writer.verbose
-    sz = filesize(writer.filepath)
+
     verbose && @info begin
-        "Filesize $(pretty_filesize(sz)) has exceeded maximum file size $(pretty_filesize(writer.max_filesize))."
+        schedule_type = summary(writer.file_splitting)
+        "Splitting output because $(schedule_type) is activated."
     end
 
     if writer.part == 1
@@ -329,7 +333,7 @@ function start_next_file(model, writer::JLD2OutputWriter)
     verbose && @info "Now writing to: $(writer.filepath)"
 
     initialize_jld2_file!(writer, model)
-    
+
     return nothing
 end
 
@@ -346,5 +350,6 @@ function Base.show(io::IO, ow::JLD2OutputWriter)
               "├── $Noutputs outputs: ", prettykeys(ow.outputs), show_averaging_schedule(averaging_schedule), "\n",
               "├── array type: ", show_array_type(ow.array_type), "\n",
               "├── including: ", ow.including, "\n",
-              "└── max filesize: ", pretty_filesize(ow.max_filesize))
+              "├── file_splitting: ", summary(ow.file_splitting), "\n",
+              "└── file size: ", pretty_filesize(filesize(ow.filepath)))
 end
diff --git a/src/OutputWriters/netcdf_output_writer.jl b/src/OutputWriters/netcdf_output_writer.jl
index 0c02e6f627..0fb468a117 100644
--- a/src/OutputWriters/netcdf_output_writer.jl
+++ b/src/OutputWriters/netcdf_output_writer.jl
@@ -10,7 +10,7 @@ using Oceananigans.Utils: versioninfo_with_gpu, oceananigans_versioninfo, pretty
 using Oceananigans.TimeSteppers: float_or_date_time
 using Oceananigans.Fields: reduced_dimensions, reduced_location, location, validate_indices
 
-mutable struct NetCDFOutputWriter{D, O, T, A} <: AbstractOutputWriter
+mutable struct NetCDFOutputWriter{D, O, T, A, FS} <: AbstractOutputWriter
     filepath :: String
     dataset :: D
     outputs :: O
@@ -24,7 +24,7 @@ mutable struct NetCDFOutputWriter{D, O, T, A} <: AbstractOutputWriter
     overwrite_existing :: Bool
     deflatelevel :: Int
     part :: Int
-    max_filesize :: Float64
+    file_splitting :: FS
     verbose :: Bool
 end
 
@@ -173,7 +173,7 @@ end
                            overwrite_existing = false,
                                  deflatelevel = 0,
                                          part = 1,
-                                 max_filesize = Inf,
+                               file_splitting = NoFileSplitting(),
                                       verbose = false)
 
 Construct a `NetCDFOutputWriter` that writes `(label, output)` pairs in `outputs` (which should
@@ -222,15 +222,19 @@ Keyword arguments
                   and 9 means maximum compression). See [NCDatasets.jl documentation](https://alexander-barth.github.io/NCDatasets.jl/stable/variables/#Creating-a-variable)
                   for more information.
 
-- `part`: The starting part number used if `max_filesize` is finite.
-          Default: 1.
-
-- `max_filesize`: The writer will stop writing to the output file once the file size exceeds `max_filesize`,
-                  and write to a new one with a consistent naming scheme ending in `part1`, `part2`, etc.
-                  Defaults to `Inf`.
+- `file_splitting`: Schedule for splitting the output file. The new files will be suffixed with
+          `_part1`, `_part2`, etc. For example `file_splitting = FileSizeLimit(sz)` will
+          split the output file when its size exceeds `sz`. Another example is 
+          `file_splitting = TimeInterval(30days)`, which will split files every 30 days of
+          simulation time. The default incurs no splitting (`NoFileSplitting()`).
 
 ## Miscellaneous keywords
 
+- `verbose`: Log what the output writer is doing with statistics on compute/write times and file sizes.
+             Default: `false`.
+
+- `part`: The starting part number used when file splitting.
+
 - `global_attributes`: Dict of model properties to save with every file. Default: `Dict()`.
 
 - `output_attributes`: Dict of attributes to be saved with each field variable (reasonable
@@ -263,6 +267,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute):
 ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0)
 ├── 2 outputs: (c, u)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 14.8 KiB
 ```
 
 ```jldoctest netcdf1
@@ -276,6 +282,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute):
 ├── dimensions: zC(1), zF(1), xC(16), yF(16), xF(16), yC(16), time(0)
 ├── 2 outputs: (c, u)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 14.8 KiB
 ```
 
 ```jldoctest netcdf1
@@ -291,6 +299,8 @@ NetCDFOutputWriter scheduled on TimeInterval(1 minute):
 ├── dimensions: zC(16), zF(17), xC(1), yF(1), xF(1), yC(1), time(0)
 ├── 2 outputs: (c, u) averaged on AveragedTimeInterval(window=20 seconds, stride=1, interval=1 minute)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 17.6 KiB
 ```
 
 `NetCDFOutputWriter` also accepts output functions that write scalars and arrays to disk,
@@ -341,6 +351,8 @@ NetCDFOutputWriter scheduled on IterationInterval(1):
 ├── dimensions: zC(16), zF(17), xC(16), yF(16), xF(16), yC(16), time(0)
 ├── 3 outputs: (profile, slice, scalar)
 └── array type: Array{Float64}
+├── file_splitting: NoFileSplitting
+└── file size: 17.8 KiB
 ```
 """
 function NetCDFOutputWriter(model, outputs; filename, schedule,
@@ -354,12 +366,14 @@ function NetCDFOutputWriter(model, outputs; filename, schedule,
                            overwrite_existing = nothing,
                                  deflatelevel = 0,
                                          part = 1,
-                                 max_filesize = Inf,
+                               file_splitting = NoFileSplitting(),
                                       verbose = false)
     mkpath(dir)
     filename = auto_extension(filename, ".nc")
     filepath = joinpath(dir, filename)
 
+    update_file_splitting_schedule!(file_splitting, filepath)
+
     if isnothing(overwrite_existing)
         if isfile(filepath)
             overwrite_existing = false
@@ -415,7 +429,7 @@ function NetCDFOutputWriter(model, outputs; filename, schedule,
                               overwrite_existing,
                               deflatelevel,
                               part,
-                              max_filesize,
+                              file_splitting,
                               verbose)
 end
 
@@ -485,9 +499,9 @@ Write output to netcdf file `output_writer.filepath` at specified intervals. Inc
 every time an output is written to the file.
 """
 function write_output!(ow::NetCDFOutputWriter, model)
-    # TODO allow user to split by number of snapshots, rathern than filesize.
-    # Start a new file if the filesize exceeds max_filesize
-    filesize(ow.filepath) ≥ ow.max_filesize && start_next_file(model, ow)
+    # Start a new file if the file_splitting(model) is true
+    ow.file_splitting(model) && start_next_file(model, ow)
+    update_file_splitting_schedule!(ow.file_splitting, ow.filepath)
 
     ow.dataset = open(ow)
 
@@ -556,7 +570,9 @@ function Base.show(io::IO, ow::NetCDFOutputWriter)
               "├── filepath: ", ow.filepath, "\n",
               "├── dimensions: $dims", "\n",
               "├── $Noutputs outputs: ", prettykeys(ow.outputs), show_averaging_schedule(averaging_schedule), "\n",
-              "└── array type: ", show_array_type(ow.array_type))
+              "└── array type: ", show_array_type(ow.array_type), "\n",
+              "├── file_splitting: ", summary(ow.file_splitting), "\n",
+              "└── file size: ", pretty_filesize(filesize(ow.filepath)))
 end
 
 #####
@@ -577,11 +593,16 @@ dictify(outputs::LagrangianParticles) = Dict("particles" => outputs)
 default_dimensions(outputs::Dict{String,<:LagrangianParticles}, grid, indices, with_halos) =
     Dict("particle_id" => collect(1:length(outputs["particles"])))
 
+#####
+##### File splitting
+#####
+
 function start_next_file(model, ow::NetCDFOutputWriter)
     verbose = ow.verbose
-    sz = filesize(ow.filepath)
+
     verbose && @info begin
-        "Filesize $(pretty_filesize(sz)) has exceeded maximum file size $(pretty_filesize(ow.max_filesize))."
+        schedule_type = summary(ow.file_splitting)
+        "Splitting output because $(schedule_type) is activated."
     end
 
     if ow.part == 1
diff --git a/src/OutputWriters/output_writer_utils.jl b/src/OutputWriters/output_writer_utils.jl
index 7225d69182..b053e84e55 100644
--- a/src/OutputWriters/output_writer_utils.jl
+++ b/src/OutputWriters/output_writer_utils.jl
@@ -6,11 +6,52 @@ using Oceananigans.Fields: AbstractField, indices, boundary_conditions, instanti
 using Oceananigans.BoundaryConditions: bc_str, FieldBoundaryConditions, ContinuousBoundaryFunction, DiscreteBoundaryFunction
 using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper, RungeKutta3TimeStepper
 using Oceananigans.Models.LagrangianParticleTracking: LagrangianParticles
+using Oceananigans.Utils: AbstractSchedule
 
 #####
 ##### Output writer utilities
 #####
 
+mutable struct FileSizeLimit <: AbstractSchedule
+    size_limit :: Float64
+    path :: String
+end
+
+"""
+    FileSizeLimit(size_limit [, path=""])
+
+Return a schedule that actuates when the file at `path` exceeds
+the `size_limit`.
+
+The `path` is automatically added and updated when `FileSizeLimit` is
+used with an output writer, and should not be provided manually.
+"""
+FileSizeLimit(size_limit) = FileSizeLimit(size_limit, "")
+
+(fsl::FileSizeLimit)(model) = filesize(fsl.path) ≥ fsl.size_limit
+
+function Base.summary(fsl::FileSizeLimit)
+    current_size_str = pretty_filesize(filesize(fsl.path))
+    size_limit_str = pretty_filesize(fsl.size_limit)
+    return string("FileSizeLimit(size_limit=", size_limit_str,
+                              ", path=", fsl.path, " (", current_size_str, ")")
+end
+
+Base.show(io::IO, fsl::FileSizeLimit) = print(io, summary(fsl))
+
+# Update schedule based on user input
+update_file_splitting_schedule!(schedule, filepath) = nothing
+
+function update_file_splitting_schedule!(schedule::FileSizeLimit, filepath) 
+    schedule.path = filepath
+    return nothing
+end 
+
+struct NoFileSplitting end
+(::NoFileSplitting)(model) = false
+Base.summary(::NoFileSplitting) = "NoFileSplitting" 
+Base.show(io::IO, nfs::NoFileSplitting) = print(io, summary(nfs))
+
 """
     ext(ow)
 
diff --git a/src/OutputWriters/windowed_time_average.jl b/src/OutputWriters/windowed_time_average.jl
index 0a5c29b372..1a49615f48 100644
--- a/src/OutputWriters/windowed_time_average.jl
+++ b/src/OutputWriters/windowed_time_average.jl
@@ -61,8 +61,7 @@ to time-average its outputs before writing them to disk:
 
 ```jldoctest averaged_time_interval
 using Oceananigans
-using Oceananigans.OutputWriters: JLD2OutputWriter
-using Oceananigans.Utils: minutes
+using Oceananigans.Units
 
 model = NonhydrostaticModel(grid=RectilinearGrid(size=(1, 1, 1), extent=(1, 1, 1)))
 
@@ -78,7 +77,8 @@ JLD2OutputWriter scheduled on TimeInterval(4 days):
 ├── 3 outputs: (u, v, w) averaged on AveragedTimeInterval(window=2 days, stride=2, interval=4 days)
 ├── array type: Array{Float64}
 ├── including: [:grid, :coriolis, :buoyancy, :closure]
-└── max filesize: Inf YiB
+├── file_splitting: NoFileSplitting
+└── file size: 26.7 KiB
 ```
 """
 function AveragedTimeInterval(interval; window=interval, stride=1)
diff --git a/test/test_jld2_output_writer.jl b/test/test_jld2_output_writer.jl
index 905a2fa087..0fdeb670c6 100644
--- a/test/test_jld2_output_writer.jl
+++ b/test/test_jld2_output_writer.jl
@@ -49,7 +49,6 @@ function test_jld2_file_splitting(arch)
     function fake_bc_init(file, model)
         file["boundary_conditions/fake"] = π
     end
-
     ow = JLD2OutputWriter(model, (; u=model.velocities.u);
                           dir = ".",
                           filename = "test.jld2",
@@ -58,7 +57,7 @@ function test_jld2_file_splitting(arch)
                           including = [:grid],
                           array_type = Array{Float64},
                           with_halos = true,
-                          max_filesize = 200KiB,
+                          file_splitting = FileSizeLimit(200KiB),
                           overwrite_existing = true)
 
     push!(simulation.output_writers, ow)
@@ -70,6 +69,7 @@ function test_jld2_file_splitting(arch)
     @test filesize("test_part1.jld2") > 200KiB
     @test filesize("test_part2.jld2") > 200KiB
     @test filesize("test_part3.jld2") < 200KiB
+    @test !isfile("test_part4.jld2")
 
     for n in string.(1:3)
         filename = "test_part$n.jld2"
diff --git a/test/test_netcdf_output_writer.jl b/test/test_netcdf_output_writer.jl
index d7a59143ba..4bef67d293 100644
--- a/test/test_netcdf_output_writer.jl
+++ b/test/test_netcdf_output_writer.jl
@@ -52,8 +52,6 @@ function test_netcdf_file_splitting(arch)
 
     fake_attributes = Dict("fake_attribute"=>"fake_attribute")
 
-    max_filesize = 200KiB
-
     ow = NetCDFOutputWriter(model, (; u=model.velocities.u);
                             dir = ".",
                             filename = "test.nc",
@@ -61,7 +59,7 @@ function test_netcdf_file_splitting(arch)
                             array_type = Array{Float64},
                             with_halos = true,
                             global_attributes = fake_attributes,
-                            max_filesize,
+                            file_splitting = FileSizeLimit(200KiB),
                             overwrite_existing = true)
 
     push!(simulation.output_writers, ow)
@@ -70,9 +68,9 @@ function test_netcdf_file_splitting(arch)
     run!(simulation)
 
     # Test that files has been split according to size as expected.
-    @test filesize("test_part1.nc") > max_filesize
-    @test filesize("test_part2.nc") > max_filesize
-    @test filesize("test_part3.nc") < max_filesize
+    @test filesize("test_part1.nc") > 200KiB
+    @test filesize("test_part2.nc") > 200KiB
+    @test filesize("test_part3.nc") < 200KiB
     @test !isfile("test_part4.nc")
 
     for n in string.(1:3)
diff --git a/validation/barotropic_gyre/barotropic_gyre.jl b/validation/barotropic_gyre/barotropic_gyre.jl
index db641b63c5..c522983a7a 100644
--- a/validation/barotropic_gyre/barotropic_gyre.jl
+++ b/validation/barotropic_gyre/barotropic_gyre.jl
@@ -3,8 +3,6 @@
 using Oceananigans
 using Oceananigans.Grids
 
-using Oceananigans.Coriolis: HydrostaticSphericalCoriolis
-
 using Oceananigans.Advection: EnergyConserving, EnstrophyConserving
 
 using Oceananigans.Models.HydrostaticFreeSurfaceModels:
@@ -13,11 +11,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels:
     ExplicitFreeSurface,
     ImplicitFreeSurface
 
-
-using Oceananigans.Utils: prettytime, hours, day, days, years
-using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval
-
-using Oceananigans.ImmersedBoundaries: ImmersedBoundaryGrid, GridFittedBoundary, GridFittedBottom
+using Oceananigans.Units
 
 using Statistics
 using JLD2
diff --git a/validation/solid_body_rotation/solid_body_rotation.jl b/validation/solid_body_rotation/solid_body_rotation.jl
index 98e52ed16f..c3811cd4b7 100644
--- a/validation/solid_body_rotation/solid_body_rotation.jl
+++ b/validation/solid_body_rotation/solid_body_rotation.jl
@@ -27,7 +27,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels:
     ExplicitFreeSurface
 
 using Oceananigans.Utils: prettytime, hours
-using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval
 
 using Statistics
 using JLD2
diff --git a/validation/solid_body_rotation/solid_body_tracer_advection.jl b/validation/solid_body_rotation/solid_body_tracer_advection.jl
index 397aaccc54..fe05e17a4a 100644
--- a/validation/solid_body_rotation/solid_body_tracer_advection.jl
+++ b/validation/solid_body_rotation/solid_body_tracer_advection.jl
@@ -27,7 +27,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels:
     PrescribedVelocityFields
 
 using Oceananigans.Utils: prettytime, hours
-using Oceananigans.OutputWriters: JLD2OutputWriter, TimeInterval, IterationInterval
 
 using JLD2
 using Printf