diff --git a/ext/OceananigansEnzymeExt.jl b/ext/OceananigansEnzymeExt.jl index ce1662d32f..c3b4954de8 100644 --- a/ext/OceananigansEnzymeExt.jl +++ b/ext/OceananigansEnzymeExt.jl @@ -228,7 +228,7 @@ function EnzymeCore.EnzymeRules.augmented_primal(config, include_right_boundaries = false, reduced_dimensions = (), location = nothing, - only_active_cells = nothing, + active_cells_map = nothing, kwargs...) where N @@ -239,8 +239,8 @@ function EnzymeCore.EnzymeRules.augmented_primal(config, offset = Oceananigans.Utils.offsets(workspec.val) - if !isnothing(only_active_cells) - workgroup, worksize = Oceananigans.Utils.active_cells_work_layout(workgroup, worksize, only_active_cells, grid.val) + if !isnothing(active_cells_map) + workgroup, worksize = Oceananigans.Utils.active_cells_work_layout(workgroup, worksize, active_cells_map, grid.val) offset = nothing end @@ -286,7 +286,7 @@ function EnzymeCore.EnzymeRules.reverse(config::EnzymeCore.EnzymeRules.ConfigWid include_right_boundaries = false, reduced_dimensions = (), location = nothing, - only_active_cells = nothing, + active_cells_map = nothing, kwargs...) where N subrets = if tape !== nothing diff --git a/src/Advection/tracer_advection_operators.jl b/src/Advection/tracer_advection_operators.jl index b01f2471ff..86cc43229b 100644 --- a/src/Advection/tracer_advection_operators.jl +++ b/src/Advection/tracer_advection_operators.jl @@ -1,6 +1,31 @@ using Oceananigans.Operators: Vᶜᶜᶜ using Oceananigans.Fields: ZeroField +struct TracerAdvection{N, FT, A, B, C} <: AbstractAdvectionScheme{N, FT} + x :: A + y :: B + z :: C + + TracerAdvection{N, FT}(x::A, y::B, z::C) where {N, FT, A, B, C} = new{N, FT, A, B, C}(x, y, z) +end + +""" + function TracerAdvection(x, y, z) + +Builds a `TracerAdvection` type with reconstructions schemes `x`, `y`, and `z` to be applied in +the x, y, and z direction, respectively. +""" +function TracerAdvection(x_advection, y_advection, z_advection) + Hx = required_halo_size(x_advection) + Hy = required_halo_size(y_advection) + Hz = required_halo_size(z_advection) + + FT = eltype(x_advection) + H = max(Hx, Hy, Hz) + + return TracerAdvection{H, FT}(x_advection, y_advection, z_advection) +end + @inline _advective_tracer_flux_x(args...) = advective_tracer_flux_x(args...) @inline _advective_tracer_flux_y(args...) = advective_tracer_flux_y(args...) @inline _advective_tracer_flux_z(args...) = advective_tracer_flux_z(args...) @@ -32,3 +57,9 @@ which ends up at the location `ccc`. δyᵃᶜᵃ(i, j, k, grid, _advective_tracer_flux_y, advection, U.v, c) + δzᵃᵃᶜ(i, j, k, grid, _advective_tracer_flux_z, advection, U.w, c)) end + +@inline function div_Uc(i, j, k, grid, advection::TracerAdvection, U, c) + return 1/Vᶜᶜᶜ(i, j, k, grid) * (δxᶜᵃᵃ(i, j, k, grid, _advective_tracer_flux_x, advection.x, U.u, c) + + δyᵃᶜᵃ(i, j, k, grid, _advective_tracer_flux_y, advection.y, U.v, c) + + δzᵃᵃᶜ(i, j, k, grid, _advective_tracer_flux_z, advection.z, U.w, c)) +end diff --git a/src/Advection/vector_invariant_advection.jl b/src/Advection/vector_invariant_advection.jl index 66b543dada..7530e1788c 100644 --- a/src/Advection/vector_invariant_advection.jl +++ b/src/Advection/vector_invariant_advection.jl @@ -111,9 +111,9 @@ Vector Invariant, Dimension-by-dimension reconstruction function VectorInvariant(; vorticity_scheme = EnstrophyConserving(), vorticity_stencil = VelocityStencil(), vertical_scheme = EnergyConserving(), - kinetic_energy_gradient_scheme = vertical_scheme, divergence_scheme = vertical_scheme, - upwinding = OnlySelfUpwinding(; cross_scheme = vertical_scheme), + kinetic_energy_gradient_scheme = divergence_scheme, + upwinding = OnlySelfUpwinding(; cross_scheme = divergence_scheme), multi_dimensional_stencil = false) N = required_halo_size(vorticity_scheme) @@ -144,10 +144,10 @@ const VectorInvariantKEGradientEnergyConserving = VectorInvariant{<:Any, <:Any, const VectorInvariantKineticEnergyUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme} -# VectorInvariant{N, FT, M, Z, ZS, V, K, D, U (upwinding) -const VectorInvariantCrossVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:Any, <:Any, <:CrossAndSelfUpwinding} -const VectorInvariantSelfVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:Any, <:Any, <:OnlySelfUpwinding} -const VectorInvariantVelocityVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:Any, <:Any, <:VelocityUpwinding} +# VectorInvariant{N, FT, M, Z, ZS, V, K, D, U (upwinding) +const VectorInvariantCrossVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:CrossAndSelfUpwinding} +const VectorInvariantSelfVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:OnlySelfUpwinding} +const VectorInvariantVelocityVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:VelocityUpwinding} Base.summary(a::VectorInvariant) = string("Vector Invariant, Dimension-by-dimension reconstruction") Base.summary(a::MultiDimensionalVectorInvariant) = string("Vector Invariant, Multidimensional reconstruction") @@ -166,10 +166,7 @@ Base.show(io::IO, a::VectorInvariant{N, FT}) where {N, FT} = ##### Convenience for WENO Vector Invariant ##### -# VectorInvariant{N, FT, M, Z (vorticity scheme), ZS, V (vertical scheme), K (kinetic energy gradient scheme) -const WENOVectorInvariant = VectorInvariant{<:Any, <:Any, <:Any, <:WENO, <:Any, <:WENO, <:WENO} - -nothing_to_default(user_value, default) = isnothing(user_value) ? default : user_value +nothing_to_default(user_value; default) = isnothing(user_value) ? default : user_value """ WENOVectorInvariant(; upwinding = nothing, @@ -188,23 +185,23 @@ function WENOVectorInvariant(; upwinding = nothing, weno_kw...) if isnothing(order) # apply global defaults - vorticity_order = nothing_to_default(vorticity_order, default=9) - vertical_order = nothing_to_default(vertical_order, default=5) - divergence_order = nothing_to_default(divergence_order, default=5) - kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default=5) + vorticity_order = nothing_to_default(vorticity_order, default = 9) + vertical_order = nothing_to_default(vertical_order, default = 5) + divergence_order = nothing_to_default(divergence_order, default = 5) + kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default = 5) else # apply user supplied `order` unless overridden by more specific value - vorticity_order = nothing_to_default(vorticity_order, default=order) - vertical_order = nothing_to_default(vertical_order, default=order) - divergence_order = nothing_to_default(divergence_order, default=order) - kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default=order) + vorticity_order = nothing_to_default(vorticity_order, default = order) + vertical_order = nothing_to_default(vertical_order, default = order) + divergence_order = nothing_to_default(divergence_order, default = order) + kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default = order) end - vorticity_scheme = WENO(; order=vorticity_order, weno_kw...) - vertical_scheme = WENO(; order=vertical_order, weno_kw...) - kinetic_energy_gradient_scheme = WENO(; order=kinetic_energy_gradient_order, weno_kw...) - divergence_scheme = WENO(; order=divergence_order, weno_kw...) + vorticity_scheme = WENO(; order = vorticity_order, weno_kw...) + vertical_scheme = WENO(; order = vertical_order, weno_kw...) + kinetic_energy_gradient_scheme = WENO(; order = kinetic_energy_gradient_order, weno_kw...) + divergence_scheme = WENO(; order = divergence_order, weno_kw...) - default_upwinding = OnlySelfUpwinding(cross_scheme=divergence_scheme) + default_upwinding = OnlySelfUpwinding(cross_scheme = divergence_scheme) upwinding = nothing_to_default(upwinding; default = default_upwinding) schemes = (vorticity_scheme, vertical_scheme, kinetic_energy_gradient_scheme, divergence_scheme) diff --git a/src/Architectures.jl b/src/Architectures.jl index 6dbc3cf2b1..78b55a067a 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -115,4 +115,10 @@ end @inline unsafe_free!(a::CuArray) = CUDA.unsafe_free!(a) @inline unsafe_free!(a) = nothing +# Convert arguments to GPU-compatible types +@inline convert_args(::CPU, args) = args +@inline convert_args(::GPU, args) = CUDA.cudaconvert(args) +@inline convert_args(::GPU, args::Tuple) = map(CUDA.cudaconvert, args) + end # module + diff --git a/src/DistributedComputations/distributed_architectures.jl b/src/DistributedComputations/distributed_architectures.jl index 055bbb93b1..2aa2a53e45 100644 --- a/src/DistributedComputations/distributed_architectures.jl +++ b/src/DistributedComputations/distributed_architectures.jl @@ -2,7 +2,7 @@ using Oceananigans.Architectures using Oceananigans.Grids: topology, validate_tupled_argument using CUDA: ndevices, device! -import Oceananigans.Architectures: device, cpu_architecture, arch_array, array_type, child_architecture +import Oceananigans.Architectures: device, cpu_architecture, arch_array, array_type, child_architecture, convert_args import Oceananigans.Grids: zeros import Oceananigans.Utils: sync_device!, tupleit @@ -210,7 +210,7 @@ function Distributed(child_architecture = CPU(); partition = Partition(MPI.Comm_size(communicator))) if !(MPI.Initialized()) - @info "MPI has not been initialized, so we are calling MPI.Init()". + @info "MPI has not been initialized, so we are calling MPI.Init()." MPI.Init() end @@ -265,6 +265,7 @@ arch_array(arch::Distributed, A) = arch_array(child_architecture(arch), A) zeros(FT, arch::Distributed, N...) = zeros(FT, child_architecture(arch), N...) array_type(arch::Distributed) = array_type(child_architecture(arch)) sync_device!(arch::Distributed) = sync_device!(arch.child_architecture) +convert_args(arch::Distributed, arg) = convert_args(child_architecture(arch), arg) cpu_architecture(arch::DistributedCPU) = arch cpu_architecture(arch::Distributed{A, S}) where {A, S} = diff --git a/src/DistributedComputations/distributed_fields.jl b/src/DistributedComputations/distributed_fields.jl index ebffdf4f76..878646662e 100644 --- a/src/DistributedComputations/distributed_fields.jl +++ b/src/DistributedComputations/distributed_fields.jl @@ -39,16 +39,19 @@ end function set!(u::DistributedField, v::Union{Array, CuArray}) gsize = global_size(architecture(u), size(u)) - if size(v) == size(u) - f = arch_array(architecture(u), v) - u .= f - return u - elseif size(v) == gsize + if size(v) == gsize f = partition_global_array(architecture(u), v, size(u)) u .= f return u else - throw(ArgumentError("ERROR: DimensionMismatch: array could not be set to match destination field")) + try + f = arch_array(architecture(u), v) + u .= f + return u + + catch + throw(ArgumentError("ERROR: DimensionMismatch: array could not be set to match destination field")) + end end end diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 6cd12eeb95..90a3d4b80a 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -41,8 +41,9 @@ end @inline local_sizes(N, R::Fractional) = Tuple(ceil(Int, N * r) for r in R.sizes) @inline function local_sizes(N, R::Sizes) if N != sum(R.sizes) - @warn "The domain size specified in the architecture $(R.sizes) is inconsistent - with the grid size $N: using the architecture-specified size" + @warn "The Sizes specified in the architecture $(R.sizes) is inconsistent + with the grid size: (N = $N != sum(Sizes) = $(sum(R.sizes))). + Using $(R.sizes)..." end return R.sizes end @@ -80,9 +81,9 @@ function RectilinearGrid(arch::Distributed, TY = insert_connected_topology(topology[2], Ry, rj) TZ = insert_connected_topology(topology[3], Rz, rk) - xl = partition(x, nx, arch, 1) - yl = partition(y, ny, arch, 2) - zl = partition(z, nz, arch, 3) + xl = Rx == 1 ? x : partition_coordinate(x, nx, arch, 1) + yl = Ry == 1 ? y : partition_coordinate(y, ny, arch, 2) + zl = Rz == 1 ? z : partition_coordinate(z, nz, arch, 3) Lx, xᶠᵃᵃ, xᶜᵃᵃ, Δxᶠᵃᵃ, Δxᶜᵃᵃ = generate_coordinate(FT, topology[1](), nx, Hx, xl, :x, child_architecture(arch)) Ly, yᵃᶠᵃ, yᵃᶜᵃ, Δyᵃᶠᵃ, Δyᵃᶜᵃ = generate_coordinate(FT, topology[2](), ny, Hy, yl, :y, child_architecture(arch)) @@ -127,9 +128,9 @@ function LatitudeLongitudeGrid(arch::Distributed, TY = insert_connected_topology(topology[2], Ry, rj) TZ = insert_connected_topology(topology[3], Rz, rk) - λl = partition(longitude, nλ, arch, 1) - φl = partition(latitude, nφ, arch, 2) - zl = partition(z, nz, arch, 3) + λl = Rx == 1 ? longitude : partition_coordinate(longitude, nλ, arch, 1) + φl = Ry == 1 ? latitude : partition_coordinate(latitude, nφ, arch, 2) + zl = Rz == 1 ? z : partition_coordinate(z, nz, arch, 3) # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, @@ -186,9 +187,9 @@ function reconstruct_global_grid(grid::DistributedRectilinearGrid) z = cpu_face_constructor_z(grid) ## This will not work with 3D parallelizations!! - xG = Rx == 1 ? x : assemble(x, nx, Rx, ri, rj, rk, arch.communicator) - yG = Ry == 1 ? y : assemble(y, ny, Ry, rj, ri, rk, arch.communicator) - zG = Rz == 1 ? z : assemble(z, nz, Rz, rk, ri, rj, arch.communicator) + xG = Rx == 1 ? x : assemble_coordinate(x, nx, Rx, ri, rj, rk, arch.communicator) + yG = Ry == 1 ? y : assemble_coordinate(y, ny, Ry, rj, ri, rk, arch.communicator) + zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) child_arch = child_architecture(arch) @@ -229,9 +230,9 @@ function reconstruct_global_grid(grid::DistributedLatitudeLongitudeGrid) z = cpu_face_constructor_z(grid) ## This will not work with 3D parallelizations!! - λG = Rx == 1 ? λ : assemble(λ, nλ, Rx, ri, rj, rk, arch.communicator) - φG = Ry == 1 ? φ : assemble(φ, nφ, Ry, rj, ri, rk, arch.communicator) - zG = Rz == 1 ? z : assemble(z, nz, Rz, rk, ri, rj, arch.communicator) + λG = Rx == 1 ? λ : assemble_coordinate(λ, nλ, Rx, ri, rj, rk, arch.communicator) + φG = Ry == 1 ? φ : assemble_coordinate(φ, nφ, Ry, rj, ri, rk, arch.communicator) + zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) child_arch = child_architecture(arch) @@ -340,4 +341,4 @@ function reconstruct_global_topology(T, R, r, r1, r2, comm) else return Bounded end -end +end \ No newline at end of file diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl index 87865c75ab..06264dd39c 100644 --- a/src/DistributedComputations/halo_communication.jl +++ b/src/DistributedComputations/halo_communication.jl @@ -142,11 +142,10 @@ end # corner passing routine function fill_corners!(c, connectivity, indices, loc, arch, grid, buffers, args...; async = false, only_local_halos = false, kwargs...) - if only_local_halos # No corner filling needed! - return nothing - end + # No corner filling needed! + only_local_halos && return nothing - # This has to be synchronized!! + # This has to be synchronized! fill_send_buffers!(c, buffers, grid, Val(:corners)) sync_device!(arch) @@ -189,7 +188,6 @@ function fill_halo_event!(c, fill_halos!, bcs, indices, loc, arch, grid::Distrib if !only_local_halos # Then we need to fill the `send` buffers fill_send_buffers!(c, buffers, grid, Val(buffer_side)) - sync_device!(arch) end # Calculate size and offset of the fill_halo kernel @@ -201,7 +199,7 @@ function fill_halo_event!(c, fill_halos!, bcs, indices, loc, arch, grid::Distrib requests = fill_halos!(c, bcs..., size, offset, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) pool_requests_or_complete_comm!(c, arch, grid, buffers, requests, async, buffer_side) - + return nothing end @@ -245,6 +243,8 @@ for (side, opposite_side) in zip([:west, :south], [:east, :north]) grid::DistributedGrid, buffers, args...; only_local_halos = false, kwargs...) only_local_halos && return nothing + + sync_device!(arch) @assert bc_side.condition.from == bc_opposite_side.condition.from # Extra protection in case of bugs local_rank = bc_side.condition.from @@ -275,11 +275,12 @@ for side in [:west, :east, :south, :north] only_local_halos && return nothing + sync_device!(arch) + child_arch = child_architecture(arch) local_rank = bc_side.condition.from recv_req = $recv_and_fill_side_halo!(c, grid, arch, loc, local_rank, bc_side.condition.to, buffers) - send_req = $send_side_halo(c, grid, arch, loc, local_rank, bc_side.condition.to, buffers) return [send_req, recv_req] diff --git a/src/DistributedComputations/partition_assemble.jl b/src/DistributedComputations/partition_assemble.jl index 1010c0f6bf..c8b90e3cbb 100644 --- a/src/DistributedComputations/partition_assemble.jl +++ b/src/DistributedComputations/partition_assemble.jl @@ -37,14 +37,19 @@ end # Partitioning (localization of global objects) and assembly (global assembly of local objects) # Used for grid constructors (cpu_face_constructor_x, cpu_face_constructor_y, cpu_face_constructor_z) -# which means that we need to repeat the value at the right boundary -function partition(c::AbstractVector, n, arch, idx) +# We need to repeat the value at the right boundary +function partition_coordinate(c::AbstractVector, n, arch, idx) nl = concatenate_local_sizes(n, arch, idx) r = arch.local_index[idx] - return c[1 + sum(nl[1:r-1]) : sum(nl[1:r])] + # Allow for Face values + if r == arch.ranks[idx] + return c[1 + sum(nl[1:r-1]) : end] + else + return c[1 + sum(nl[1:r-1]) : sum(nl[1:r])] + end end -function partition(c::Tuple, n, arch, idx) +function partition_coordinate(c::Tuple, n, arch, idx) nl = concatenate_local_sizes(n, arch, idx) N = sum(nl) R = arch.ranks[idx] @@ -60,14 +65,14 @@ function partition(c::Tuple, n, arch, idx) end """ - assemble(c::AbstractVector, n, R, r, r1, r2, comm) + assemble_coordinate(c::AbstractVector, n, R, r, r1, r2, comm) Builds a linear global coordinate vector given a local coordinate vector `c_local` a local number of elements `Nc`, number of ranks `Nr`, rank `r`, and `arch`itecture. Since we use a global reduction, only ranks at positions 1 in the other two directions `r1 == 1` and `r2 == 1` fill the 1D array. """ -function assemble(c_local::AbstractVector, n, R, r, r1, r2, comm) +function assemble_coordinate(c_local::AbstractVector, n, R, r, r1, r2, comm) nl = concatenate_local_sizes(n, R, r) c_global = zeros(eltype(c_local), sum(nl)+1) @@ -83,7 +88,7 @@ function assemble(c_local::AbstractVector, n, R, r, r1, r2, comm) end # Simple case, just take the first and the last core -function assemble(c::Tuple, n, R, r, r1, r2, comm) +function assemble_coordinate(c::Tuple, n, R, r, r1, r2, comm) c_global = zeros(Float64, 2) if r == 1 && r1 == 1 && r2 == 1 diff --git a/src/Grids/grid_generation.jl b/src/Grids/grid_generation.jl index b1480aa1f3..98bd421d43 100644 --- a/src/Grids/grid_generation.jl +++ b/src/Grids/grid_generation.jl @@ -13,6 +13,7 @@ get_face_node(coord::Function, i) = coord(i) get_face_node(coord::AbstractVector, i) = CUDA.@allowscalar coord[i] const AT = AbstractTopology + lower_exterior_Δcoordᶠ(::AT, Fi, Hcoord) = [Fi[end - Hcoord + i] - Fi[end - Hcoord + i - 1] for i = 1:Hcoord] lower_exterior_Δcoordᶠ(::BoundedTopology, Fi, Hcoord) = [Fi[2] - Fi[1] for _ = 1:Hcoord] diff --git a/src/ImmersedBoundaries/ImmersedBoundaries.jl b/src/ImmersedBoundaries/ImmersedBoundaries.jl index 49d49d3934..f13bdabb4b 100644 --- a/src/ImmersedBoundaries/ImmersedBoundaries.jl +++ b/src/ImmersedBoundaries/ImmersedBoundaries.jl @@ -102,18 +102,19 @@ abstract type AbstractImmersedBoundary end ##### ImmersedBoundaryGrid ##### -struct ImmersedBoundaryGrid{FT, TX, TY, TZ, G, I, M, Arch} <: AbstractGrid{FT, TX, TY, TZ, Arch} +struct ImmersedBoundaryGrid{FT, TX, TY, TZ, G, I, M, S, Arch} <: AbstractGrid{FT, TX, TY, TZ, Arch} architecture :: Arch underlying_grid :: G immersed_boundary :: I interior_active_cells :: M - + active_z_columns :: S + # Internal interface - function ImmersedBoundaryGrid{TX, TY, TZ}(grid::G, ib::I, mi::M) where {TX, TY, TZ, G <: AbstractUnderlyingGrid, I, M} + function ImmersedBoundaryGrid{TX, TY, TZ}(grid::G, ib::I, mi::M, ms::S) where {TX, TY, TZ, G <: AbstractUnderlyingGrid, I, M, S} FT = eltype(grid) arch = architecture(grid) Arch = typeof(arch) - return new{FT, TX, TY, TZ, G, I, M, Arch}(arch, grid, ib, mi) + return new{FT, TX, TY, TZ, G, I, M, S, Arch}(arch, grid, ib, mi, ms) end # Constructor with no active map @@ -121,7 +122,7 @@ struct ImmersedBoundaryGrid{FT, TX, TY, TZ, G, I, M, Arch} <: AbstractGrid{FT, T FT = eltype(grid) arch = architecture(grid) Arch = typeof(arch) - return new{FT, TX, TY, TZ, G, I, Nothing, Arch}(arch, grid, ib, nothing) + return new{FT, TX, TY, TZ, G, I, Nothing, Nothing, Arch}(arch, grid, ib, nothing, nothing) end end @@ -132,7 +133,7 @@ const IBG = ImmersedBoundaryGrid @inline get_ibg_property(ibg::IBG, ::Val{:immersed_boundary}) = getfield(ibg, :immersed_boundary) @inline get_ibg_property(ibg::IBG, ::Val{:underlying_grid}) = getfield(ibg, :underlying_grid) @inline get_ibg_property(ibg::IBG, ::Val{:interior_active_cells}) = getfield(ibg, :interior_active_cells) -@inline get_ibg_property(ibg::IBG, ::Val{:surface_active_cells}) = getfield(ibg, :surface_active_cells) +@inline get_ibg_property(ibg::IBG, ::Val{:active_z_columns}) = getfield(ibg, :active_z_columns) @inline architecture(ibg::IBG) = architecture(ibg.underlying_grid) @@ -141,7 +142,10 @@ const IBG = ImmersedBoundaryGrid @inline z_domain(ibg::IBG) = z_domain(ibg.underlying_grid) Adapt.adapt_structure(to, ibg::IBG{FT, TX, TY, TZ}) where {FT, TX, TY, TZ} = - ImmersedBoundaryGrid{TX, TY, TZ}(adapt(to, ibg.underlying_grid), adapt(to, ibg.immersed_boundary), adapt(to, ibg.interior_active_cells)) + ImmersedBoundaryGrid{TX, TY, TZ}(adapt(to, ibg.underlying_grid), + adapt(to, ibg.immersed_boundary), + adapt(to, ibg.interior_active_cells), + adapt(to, ibg.active_z_columns)) with_halo(halo, ibg::ImmersedBoundaryGrid) = ImmersedBoundaryGrid(with_halo(halo, ibg.underlying_grid), ibg.immersed_boundary) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index f3ce915df7..dbb4869368 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -1,26 +1,92 @@ using Oceananigans +using Oceananigans.Utils using Oceananigans.Grids: AbstractGrid using KernelAbstractions: @kernel, @index -import Oceananigans.Utils: active_cells_work_layout, - use_only_active_interior_cells +import Oceananigans.Utils: active_cells_work_layout -const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +using Oceananigans.Solvers: solve_batched_tridiagonal_system_z!, ZDirection +using Oceananigans.DistributedComputations: DistributedGrid -struct InteriorMap end -struct SurfaceMap end +import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! -@inline use_only_active_interior_cells(grid::ActiveCellsIBG) = InteriorMap() +const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid, <:Any, <:NamedTuple} # Cannot be used to dispatch in kernels!!! +const ArrayActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +const NamedTupleActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:NamedTuple} -@inline use_only_active_surface_cells(grid::AbstractGrid) = nothing -@inline use_only_active_surface_cells(grid::ActiveCellsIBG) = SurfaceMap() +""" +A constant representing an immersed boundary grid, where interior active cells are mapped to linear indices in grid.interior_active_cells +""" +const ActiveCellsIBG = Union{DistributedActiveCellsIBG, ArrayActiveCellsIBG, NamedTupleActiveCellsIBG} -@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) -@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveCellsIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) +""" +A constant representing an immersed boundary grid, where active columns in the Z-direction are mapped to linear indices in grid.active_z_columns +""" +const ActiveZColumnsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} -@inline active_linear_index_to_interior_tuple(idx, grid::ActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) -@inline active_linear_index_to_surface_tuple(idx, grid::ActiveCellsIBG) = Base.map(Int, grid.surface_active_cells[idx]) +struct InteriorMap end +struct ZColumnMap end + +struct WestMap end +struct EastMap end +struct SouthMap end +struct NorthMap end + +@inline active_surface_map(::AbstractGrid) = nothing +@inline active_surface_map(::ActiveZColumnsIBG) = ZColumnMap() + +@inline active_interior_map(::Val{:west}) = WestMap() +@inline active_interior_map(::Val{:east}) = EastMap() +@inline active_interior_map(::Val{:south}) = SouthMap() +@inline active_interior_map(::Val{:north}) = NorthMap() + +@inline active_interior_map(::AbstractGrid) = nothing +@inline active_interior_map(::ActiveCellsIBG) = InteriorMap() +@inline active_interior_map(::DistributedActiveCellsIBG) = InteriorMap() + +""" + active_cells_work_layout(group, size, map_type, grid) + +Compute the work layout for active cells based on the given map type and grid. + +# Arguments +- `group`: The previous workgroup. +- `size`: The previous worksize. +- `map_type`: The type of map (e.g., `InteriorMap`, `WestMap`, `EastMap`, `SouthMap`, `NorthMap`). +- `grid`: The grid containing the active cells. + +# Returns +- A tuple `(workgroup, worksize)` representing the work layout for active cells. +""" +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ArrayActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.interior), 256), length(grid.interior_active_cells.interior) +@inline active_cells_work_layout(group, size, ::WestMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.west), 256), length(grid.interior_active_cells.west) +@inline active_cells_work_layout(group, size, ::EastMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.east), 256), length(grid.interior_active_cells.east) +@inline active_cells_work_layout(group, size, ::SouthMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.south), 256), length(grid.interior_active_cells.south) +@inline active_cells_work_layout(group, size, ::NorthMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.north), 256), length(grid.interior_active_cells.north) +@inline active_cells_work_layout(group, size, ::ZColumnMap, grid::ActiveZColumnsIBG) = min(length(grid.active_z_columns), 256), length(grid.active_z_columns) + +""" + active_linear_index_to_tuple(idx, map, grid) + +Converts a linear index to a tuple of indices based on the given map and grid. + +# Arguments +- `idx`: The linear index to convert. +- `map`: The map indicating the type of index conversion to perform. +- `grid`: The grid containing the active cells. + +# Returns +A tuple of indices corresponding to the linear index. +""" +@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::ArrayActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) +@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.interior[idx]) +@inline active_linear_index_to_tuple(idx, ::WestMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.west[idx]) +@inline active_linear_index_to_tuple(idx, ::EastMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.east[idx]) +@inline active_linear_index_to_tuple(idx, ::SouthMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.south[idx]) +@inline active_linear_index_to_tuple(idx, ::NorthMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.north[idx]) +@inline active_linear_index_to_tuple(idx, ::ZColumnMap, grid::ActiveZColumnsIBG) = Base.map(Int, grid.active_z_columns[idx]) function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) @@ -29,30 +95,38 @@ function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) # Create the cells map on the CPU, then switch it to the GPU if active_cells_map - map_interior = active_cells_map_interior(ibg) - map_interior = arch_array(architecture(ibg), map_interior) - # map_surface = active_cells_map_surface(ibg) - # map_surface = arch_array(architecture(ibg), map_surface) + interior_map = map_interior_active_cells(ibg) + column_map = map_active_z_columns(ibg) else - map_interior = nothing + interior_map = nothing + column_map = nothing end return ImmersedBoundaryGrid{TX, TY, TZ}(ibg.underlying_grid, ibg.immersed_boundary, - map_interior) + interior_map, + column_map) end +with_halo(halo, ibg::ActiveCellsIBG) = + ImmersedBoundaryGrid(with_halo(halo, ibg.underlying_grid), ibg.immersed_boundary; active_cells_map = true) + @inline active_cell(i, j, k, ibg) = !immersed_cell(i, j, k, ibg) @inline active_column(i, j, k, grid, column) = column[i, j, k] != 0 -function compute_interior_active_cells(ibg) - is_immersed_operation = KernelFunctionOperation{Center, Center, Center}(active_cell, ibg) +@kernel function _set_active_indices!(active_cells_field, grid) + i, j, k = @index(Global, NTuple) + @inbounds active_cells_field[i, j, k] = active_cell(i, j, k, grid) +end + +function compute_interior_active_cells(ibg; parameters = :xyz) active_cells_field = Field{Center, Center, Center}(ibg, Bool) - set!(active_cells_field, is_immersed_operation) + fill!(active_cells_field, false) + launch!(architecture(ibg), ibg, parameters, _set_active_indices!, active_cells_field, ibg) return active_cells_field end -function compute_surface_active_cells(ibg) +function compute_active_z_columns(ibg) one_field = ConditionalOperation{Center, Center, Center}(OneField(Int), identity, ibg, NotImmersed(truefunc), 0) column = sum(one_field, dims = 3) is_immersed_column = KernelFunctionOperation{Center, Center, Nothing}(active_column, ibg, column) @@ -65,8 +139,20 @@ const MAXUInt8 = 2^8 - 1 const MAXUInt16 = 2^16 - 1 const MAXUInt32 = 2^32 - 1 -function active_cells_map_interior(ibg) - active_cells_field = compute_interior_active_cells(ibg) +""" + interior_active_indices(ibg; parameters = :xyz) + +Compute the indices of the active interior cells in the given immersed boundary grid. + +# Arguments +- `ibg`: The immersed boundary grid. +- `parameters`: (optional) The parameters to be used for computing the active cells. Default is `:xyz`. + +# Returns +An array of tuples representing the indices of the active interior cells. +""" +function interior_active_indices(ibg; parameters = :xyz) + active_cells_field = compute_interior_active_cells(ibg; parameters) N = maximum(size(ibg)) IntType = N > MAXUInt8 ? (N > MAXUInt16 ? (N > MAXUInt32 ? UInt64 : UInt32) : UInt16) : UInt8 @@ -76,6 +162,7 @@ function active_cells_map_interior(ibg) # Cannot findall on the entire field because we incur on OOM errors active_indices = IndicesType[] active_indices = findall_active_indices!(active_indices, active_cells_field, ibg, IndicesType) + active_indices = arch_array(architecture(ibg), active_indices) return active_indices end @@ -103,19 +190,60 @@ end @inline add_3rd_index(t::Tuple, k) = (t[1], t[2], k) +map_interior_active_cells(ibg) = interior_active_indices(ibg; parameters = :xyz) + +# In case of a `DistributedGrid` we want to have different maps depending on the +# partitioning of the domain +function map_interior_active_cells(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid}) + + arch = architecture(ibg) + Rx, Ry, _ = arch.ranks + Tx, Ty, _ = topology(ibg) + Nx, Ny, Nz = size(ibg) + Hx, Hy, _ = halo_size(ibg) + + x_boundary = (Hx, Ny, Nz) + y_boundary = (Nx, Hy, Nz) + + left_offsets = (0, 0, 0) + right_x_offsets = (Nx-Hx, 0, 0) + right_y_offsets = (0, Ny-Hy, 0) + + include_west = !isa(ibg, XFlatGrid) && (Rx != 1) && !(Tx == RightConnected) + include_east = !isa(ibg, XFlatGrid) && (Rx != 1) && !(Tx == LeftConnected) + include_south = !isa(ibg, YFlatGrid) && (Ry != 1) && !(Ty == RightConnected) + include_north = !isa(ibg, YFlatGrid) && (Ry != 1) && !(Ty == LeftConnected) + + west = include_west ? interior_active_indices(ibg; parameters = KernelParameters(x_boundary, left_offsets)) : nothing + east = include_east ? interior_active_indices(ibg; parameters = KernelParameters(x_boundary, right_x_offsets)) : nothing + south = include_south ? interior_active_indices(ibg; parameters = KernelParameters(y_boundary, left_offsets)) : nothing + north = include_north ? interior_active_indices(ibg; parameters = KernelParameters(y_boundary, right_y_offsets)) : nothing + + nx = Rx == 1 ? Nx : (Tx == RightConnected || Tx == LeftConnected ? Nx - Hx : Nx - 2Hx) + ny = Ry == 1 ? Ny : (Ty == RightConnected || Ty == LeftConnected ? Ny - Hy : Ny - 2Hy) + + ox = Rx == 1 || Tx == RightConnected ? 0 : Hx + oy = Ry == 1 || Ty == RightConnected ? 0 : Hy + + interior = interior_active_indices(ibg; parameters = KernelParameters((nx, ny, Nz), (ox, oy, 0))) + + return (; interior, west, east, south, north) +end + # If we eventually want to perform also barotropic step, `w` computation and `p` # computation only on active `columns` -function active_cells_map_surface(ibg) - active_cells_field = compute_surface_active_cells(ibg) +function map_active_z_columns(ibg) + active_cells_field = compute_active_z_columns(ibg) interior_cells = arch_array(CPU(), interior(active_cells_field, :, :, 1)) full_indices = findall(interior_cells) - Nx, Ny, Nz = size(ibg) + Nx, Ny, _ = size(ibg) # Reduce the size of the active_cells_map (originally a tuple of Int64) N = max(Nx, Ny) IntType = N > MAXUInt8 ? (N > MAXUInt16 ? (N > MAXUInt32 ? UInt64 : UInt32) : UInt16) : UInt8 - smaller_indices = getproperty.(full_indices, Ref(:I)) .|> Tuple{IntType, IntType} - - return smaller_indices + surface_map = getproperty.(full_indices, Ref(:I)) .|> Tuple{IntType, IntType} + surface_map = arch_array(architecture(ibg), surface_map) + + return surface_map end diff --git a/src/ImmersedBoundaries/grid_fitted_bottom.jl b/src/ImmersedBoundaries/grid_fitted_bottom.jl index 8bd1d7de48..e693e20e2d 100644 --- a/src/ImmersedBoundaries/grid_fitted_bottom.jl +++ b/src/ImmersedBoundaries/grid_fitted_bottom.jl @@ -102,6 +102,6 @@ function on_architecture(arch, ib::GridFittedBottom{<:Field}) return GridFittedBottom(new_bottom_height, ib.immersed_condition) end -Adapt.adapt_structure(to, ib::GridFittedBottom) = GridFittedBottom(adapt(to, ib.bottom_height.data), +Adapt.adapt_structure(to, ib::GridFittedBottom) = GridFittedBottom(adapt(to, ib.bottom_height), ib.immersed_condition) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl index ab510f7c76..9e2715dcff 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl @@ -8,6 +8,8 @@ using Oceananigans.Models.NonhydrostaticModels: boundary_tendency_kernel_paramet import Oceananigans.Models: compute_boundary_tendencies! +using Oceananigans.ImmersedBoundaries: active_interior_map, DistributedActiveCellsIBG + # We assume here that top/bottom BC are always synched (no partitioning in z) function compute_boundary_tendencies!(model::HydrostaticFreeSurfaceModel) grid = model.grid @@ -21,8 +23,30 @@ function compute_boundary_tendencies!(model::HydrostaticFreeSurfaceModel) compute_auxiliaries!(model; w_parameters, p_parameters, κ_parameters) # parameters for communicating North / South / East / West side + compute_boundary_tendency_contributions!(grid, arch, model) + + return nothing +end + +function compute_boundary_tendency_contributions!(grid, arch, model) kernel_parameters = boundary_tendency_kernel_parameters(grid, arch) compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters) + return nothing +end + +function compute_boundary_tendency_contributions!(grid::DistributedActiveCellsIBG, arch, model) + maps = grid.interior_active_cells + + for (name, map) in zip(keys(maps), maps) + compute_boundary = (name != :interior) && !isnothing(map) + + # If there exists a boundary map, then we compute the boundary contributions + if compute_boundary + active_boundary_map = active_interior_map(Val(name)) + compute_hydrostatic_free_surface_tendency_contributions!(model, tuple(:xyz); + active_cells_map = active_boundary_map) + end + end return nothing end @@ -37,8 +61,8 @@ function boundary_w_kernel_parameters(grid, arch) # Offsets in tangential direction are == -1 to # cover the required corners - Oxᴸ = (-Hx, -1) - Oyᴸ = (-1, -Hy) + Oxᴸ = (-Hx+1, -1) + Oyᴸ = (-1, -Hy+1) Oxᴿ = (Nx-1, -1) Oyᴿ = (-1, Ny-1) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index 447d0efdf3..dc330fa1fd 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -13,8 +13,8 @@ import Oceananigans: tracer_tendency_kernel_function import Oceananigans.Models: complete_communication_and_compute_boundary! import Oceananigans.Models: interior_tendency_kernel_parameters -using Oceananigans.ImmersedBoundaries: use_only_active_interior_cells, ActiveCellsIBG, - InteriorMap, active_linear_index_to_interior_tuple +using Oceananigans.ImmersedBoundaries: active_interior_map, ActiveCellsIBG, + InteriorMap, active_linear_index_to_tuple """ compute_tendencies!(model::HydrostaticFreeSurfaceModel, callbacks) @@ -29,7 +29,7 @@ function compute_tendencies!(model::HydrostaticFreeSurfaceModel, callbacks) # Calculate contributions to momentum and tracer tendencies from fluxes and volume terms in the # interior of the domain compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters; - only_active_cells = use_only_active_interior_cells(model.grid)) + active_cells_map = active_interior_map(model.grid)) complete_communication_and_compute_boundary!(model, model.grid, model.architecture) @@ -80,12 +80,12 @@ end end """ Store previous value of the source term and compute current source term. """ -function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters; only_active_cells = nothing) +function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters; active_cells_map = nothing) arch = model.architecture grid = model.grid - compute_hydrostatic_momentum_tendencies!(model, model.velocities, kernel_parameters; only_active_cells) + compute_hydrostatic_momentum_tendencies!(model, model.velocities, kernel_parameters; active_cells_map) top_tracer_bcs = top_tracer_boundary_conditions(grid, model.tracers) @@ -121,9 +121,9 @@ function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_ tendency_kernel!, c_tendency, grid, - only_active_cells, + active_cells_map, args; - only_active_cells) + active_cells_map) end end @@ -160,7 +160,7 @@ function compute_free_surface_tendency!(grid, model, kernel_parameters) end """ Calculate momentum tendencies if momentum is not prescribed.""" -function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_parameters; only_active_cells = nothing) +function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_parameters; active_cells_map = nothing) grid = model.grid arch = architecture(grid) @@ -184,17 +184,17 @@ function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_para u_kernel_args = tuple(start_momentum_kernel_args..., u_immersed_bc, end_momentum_kernel_args...) v_kernel_args = tuple(start_momentum_kernel_args..., v_immersed_bc, end_momentum_kernel_args...) - + for parameters in kernel_parameters launch!(arch, grid, parameters, compute_hydrostatic_free_surface_Gu!, model.timestepper.Gⁿ.u, grid, - only_active_cells, u_kernel_args; - only_active_cells) + active_cells_map, u_kernel_args; + active_cells_map) launch!(arch, grid, parameters, compute_hydrostatic_free_surface_Gv!, model.timestepper.Gⁿ.v, grid, - only_active_cells, v_kernel_args; - only_active_cells) + active_cells_map, v_kernel_args; + active_cells_map) end compute_free_surface_tendency!(grid, model, :xy) @@ -228,26 +228,26 @@ end ##### """ Calculate the right-hand-side of the u-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end """ Calculate the right-hand-side of the v-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) end @@ -256,26 +256,26 @@ end ##### """ Calculate the right-hand-side of the tracer advection-diffusion equation. """ -@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end """ Calculate the right-hand-side of the subgrid scale energy equation. """ -@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) end diff --git a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl index 679889a888..4f14f69f2a 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl @@ -12,14 +12,11 @@ function SplitExplicitAuxiliaryFields(grid::DistributedGrid) Hᶠᶜ = Field((Face, Center, Nothing), grid) Hᶜᶠ = Field((Center, Face, Nothing), grid) - Hᶜᶜ = Field((Center, Center, Nothing), grid) - + calculate_column_height!(Hᶠᶜ, (Face, Center, Center)) calculate_column_height!(Hᶜᶠ, (Center, Face, Center)) - calculate_column_height!(Hᶜᶜ, (Center, Center, Center)) - - fill_halo_regions!((Hᶠᶜ, Hᶜᶠ, Hᶜᶜ)) + fill_halo_regions!((Hᶠᶜ, Hᶜᶠ)) # In a non-parallel grid we calculate only the interior kernel_size = augmented_kernel_size(grid) @@ -27,7 +24,7 @@ function SplitExplicitAuxiliaryFields(grid::DistributedGrid) kernel_parameters = KernelParameters(kernel_size, kernel_offsets) - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, Hᶜᶜ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, kernel_parameters) end """Integrate z at locations `location` and set! `height`` with the result""" @@ -69,14 +66,15 @@ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid::D old_halos = halo_size(grid) Nsubsteps = length(settings.substepping.averaging_weights) - new_halos = distributed_split_explicit_halos(old_halos, Nsubsteps+1, grid) - new_grid = with_halo(new_halos, grid) + extended_halos = distributed_split_explicit_halos(old_halos, Nsubsteps+1, grid) + extended_grid = with_halo(extended_halos, grid) - η = ZFaceField(new_grid, indices = (:, :, size(new_grid, 3)+1)) + Nze = size(extended_grid, 3) + η = ZFaceField(extended_grid, indices = (:, :, Nze+1)) return SplitExplicitFreeSurface(η, - SplitExplicitState(new_grid), - SplitExplicitAuxiliaryFields(new_grid), + SplitExplicitState(extended_grid, settings.timestepper), + SplitExplicitAuxiliaryFields(extended_grid), free_surface.gravitational_acceleration, free_surface.settings) end diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl index 509b1fe0ea..717bb1fd2d 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl @@ -2,6 +2,8 @@ using Oceananigans.Fields: location using Oceananigans.TimeSteppers: ab2_step_field! using Oceananigans.TurbulenceClosures: implicit_step! +using Oceananigans.ImmersedBoundaries: active_interior_map, active_surface_map + import Oceananigans.TimeSteppers: ab2_step! ##### diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 0604cd5de5..d32b5786d8 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -83,7 +83,7 @@ SplitExplicitFreeSurface(FT::DataType = Float64; gravitational_acceleration = g_ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid) η = FreeSurfaceDisplacementField(velocities, free_surface, grid) - return SplitExplicitFreeSurface(η, SplitExplicitState(grid), + return SplitExplicitFreeSurface(η, SplitExplicitState(grid, free_surface.settings.timestepper), SplitExplicitAuxiliaryFields(grid), free_surface.gravitational_acceleration, free_surface.settings) @@ -92,15 +92,12 @@ end function SplitExplicitFreeSurface(grid; gravitational_acceleration = g_Earth, settings = SplitExplicitSettings(eltype(grid); gravitational_acceleration, substeps = 200)) - if eltype(settings) != eltype(grid) - @warn "Using $(eltype(settings)) settings for the SplitExplicitFreeSurface on a $(eltype(grid)) grid" - end - - η = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) + Nz = size(grid, 3) + η = ZFaceField(grid, indices = (:, :, Nz+1)) gravitational_acceleration = convert(eltype(grid), gravitational_acceleration) - return SplitExplicitFreeSurface(η, SplitExplicitState(grid), SplitExplicitAuxiliaryFields(grid), - gravitational_acceleration, settings) + return SplitExplicitFreeSurface(η, SplitExplicitState(grid, settings.timestepper), SplitExplicitAuxiliaryFields(grid), + gravitational_acceleration, settings) end """ @@ -110,35 +107,35 @@ A type containing the state fields for the split-explicit free surface. $(FIELDS) """ -Base.@kwdef struct SplitExplicitState{𝒞𝒞, ℱ𝒞, 𝒞ℱ} +Base.@kwdef struct SplitExplicitState{CC, ACC, FC, AFC, CF, ACF} "The free surface at time `m`. (`ReducedField` over ``z``)" - ηᵐ :: 𝒞𝒞 + ηᵐ :: ACC "The free surface at time `m-1`. (`ReducedField` over ``z``)" - ηᵐ⁻¹ :: 𝒞𝒞 + ηᵐ⁻¹ :: ACC "The free surface at time `m-2`. (`ReducedField` over ``z``)" - ηᵐ⁻² :: 𝒞𝒞 + ηᵐ⁻² :: ACC "The barotropic zonal velocity at time `m`. (`ReducedField` over ``z``)" - U :: ℱ𝒞 + U :: FC "The barotropic zonal velocity at time `m-1`. (`ReducedField` over ``z``)" - Uᵐ⁻¹ :: ℱ𝒞 + Uᵐ⁻¹ :: AFC "The barotropic zonal velocity at time `m-2`. (`ReducedField` over ``z``)" - Uᵐ⁻² :: ℱ𝒞 + Uᵐ⁻² :: AFC "The barotropic meridional velocity at time `m`. (`ReducedField` over ``z``)" - V :: 𝒞ℱ + V :: CF "The barotropic meridional velocity at time `m-1`. (`ReducedField` over ``z``)" - Vᵐ⁻¹ :: 𝒞ℱ + Vᵐ⁻¹ :: ACF "The barotropic meridional velocity at time `m-2`. (`ReducedField` over ``z``)" - Vᵐ⁻² :: 𝒞ℱ + Vᵐ⁻² :: ACF "The time-filtered free surface. (`ReducedField` over ``z``)" - η̅ :: 𝒞𝒞 + η̅ :: CC "The time-filtered barotropic zonal velocity. (`ReducedField` over ``z``)" - U̅ :: ℱ𝒞 + U̅ :: FC "The time-filtered barotropic meridional velocity. (`ReducedField` over ``z``)" - V̅ :: 𝒞ℱ + V̅ :: CF end """ - SplitExplicitState(grid) + SplitExplicitState(grid, timestepper) Return the split-explicit state for `grid`. @@ -146,24 +143,26 @@ Note that `η̅` is solely used for setting the `η` at the next substep iterati acts as a filter for `η`. Values with superscripts `m-1` and `m-2` correspond to previous stored time steps to allow using a higher-order time stepping scheme, e.g., `AdamsBashforth3Scheme`. """ -function SplitExplicitState(grid::AbstractGrid) - η̅ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) +function SplitExplicitState(grid::AbstractGrid, timestepper) + + Nz = size(grid, 3) + + η̅ = ZFaceField(grid, indices = (:, :, Nz+1)) - ηᵐ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) - ηᵐ⁻¹ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) - ηᵐ⁻² = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) + ηᵐ = auxiliary_free_surface_field(grid, timestepper) + ηᵐ⁻¹ = auxiliary_free_surface_field(grid, timestepper) + ηᵐ⁻² = auxiliary_free_surface_field(grid, timestepper) - U = Field((Face, Center, Nothing), grid) - V = Field((Center, Face, Nothing), grid) + U = XFaceField(grid, indices = (:, :, Nz)) + V = YFaceField(grid, indices = (:, :, Nz)) - Uᵐ⁻¹ = Field((Face, Center, Nothing), grid) - Vᵐ⁻¹ = Field((Center, Face, Nothing), grid) - - Uᵐ⁻² = Field((Face, Center, Nothing), grid) - Vᵐ⁻² = Field((Center, Face, Nothing), grid) + Uᵐ⁻¹ = auxiliary_barotropic_U_field(grid, timestepper) + Vᵐ⁻¹ = auxiliary_barotropic_V_field(grid, timestepper) + Uᵐ⁻² = auxiliary_barotropic_U_field(grid, timestepper) + Vᵐ⁻² = auxiliary_barotropic_V_field(grid, timestepper) - U̅ = Field((Face, Center, Nothing), grid) - V̅ = Field((Center, Face, Nothing), grid) + U̅ = XFaceField(grid, indices = (:, :, Nz)) + V̅ = YFaceField(grid, indices = (:, :, Nz)) return SplitExplicitState(; ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅) end @@ -179,7 +178,7 @@ large (or `:xy` in case of a serial computation), and start computing from $(FIELDS) """ -Base.@kwdef struct SplitExplicitAuxiliaryFields{𝒞ℱ, ℱ𝒞, 𝒞𝒞, 𝒦} +Base.@kwdef struct SplitExplicitAuxiliaryFields{𝒞ℱ, ℱ𝒞, 𝒦} "Vertically-integrated slow barotropic forcing function for `U` (`ReducedField` over ``z``)" Gᵁ :: ℱ𝒞 "Vertically-integrated slow barotropic forcing function for `V` (`ReducedField` over ``z``)" @@ -188,8 +187,6 @@ Base.@kwdef struct SplitExplicitAuxiliaryFields{𝒞ℱ, ℱ𝒞, 𝒞𝒞, 𝒦 Hᶠᶜ :: ℱ𝒞 "Depth at `(Center, Face)` (`ReducedField` over ``z``)" Hᶜᶠ :: 𝒞ℱ - "Depth at `(Center, Center)` (`ReducedField` over ``z``)" - Hᶜᶜ :: 𝒞𝒞 "kernel size for barotropic time stepping" kernel_parameters :: 𝒦 end @@ -206,7 +203,6 @@ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) Hᶠᶜ = Field((Face, Center, Nothing), grid) Hᶜᶠ = Field((Center, Face, Nothing), grid) - Hᶜᶜ = Field((Center, Center, Nothing), grid) dz = GridMetricOperation((Face, Center, Center), Δz, grid) sum!(Hᶠᶜ, dz) @@ -214,14 +210,11 @@ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) dz = GridMetricOperation((Center, Face, Center), Δz, grid) sum!(Hᶜᶠ, dz) - dz = GridMetricOperation((Center, Center, Center), Δz, grid) - sum!(Hᶜᶜ, dz) - - fill_halo_regions!((Hᶠᶜ, Hᶜᶠ, Hᶜᶜ)) + fill_halo_regions!((Hᶠᶜ, Hᶜᶠ)) kernel_parameters = :xy - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, Hᶜᶜ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, kernel_parameters) end """ @@ -239,6 +232,15 @@ end struct AdamsBashforth3Scheme end struct ForwardBackwardScheme end + +auxiliary_free_surface_field(grid, ::AdamsBashforth3Scheme) = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) +auxiliary_free_surface_field(grid, ::ForwardBackwardScheme) = nothing + +auxiliary_barotropic_U_field(grid, ::AdamsBashforth3Scheme) = XFaceField(grid, indices = (:, :, size(grid, 3))) +auxiliary_barotropic_U_field(grid, ::ForwardBackwardScheme) = nothing +auxiliary_barotropic_V_field(grid, ::AdamsBashforth3Scheme) = YFaceField(grid, indices = (:, :, size(grid, 3))) +auxiliary_barotropic_V_field(grid, ::ForwardBackwardScheme) = nothing + # (p = 2, q = 4, r = 0.18927) minimize dispersion error from Shchepetkin and McWilliams (2005): https://doi.org/10.1016/j.ocemod.2004.08.002 @inline function averaging_shape_function(τ::FT; p = 2, q = 4, r = FT(0.18927)) where FT τ₀ = (p + 2) * (p + q + 2) / (p + 1) / (p + q + 1) @@ -291,7 +293,7 @@ end averaging_weights = averaging_weights[1:idx] averaging_weights ./= sum(averaging_weights) - return Δτ, averaging_weights + return Δτ, tuple(averaging_weights...) end function SplitExplicitSettings(FT::DataType=Float64; diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index b9389a0440..d9a1e67193 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -4,15 +4,19 @@ using Oceananigans.Utils using Oceananigans.AbstractOperations: Δz using Oceananigans.BoundaryConditions using Oceananigans.Operators -using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node -using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f -using Oceananigans.ImmersedBoundaries: mask_immersed_field! +using Oceananigans.Architectures: convert_args +using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node, GFBIBG +using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f, ZColumnMap +using Oceananigans.ImmersedBoundaries: mask_immersed_field!, active_surface_map, active_interior_map +using Oceananigans.ImmersedBoundaries: active_linear_index_to_tuple, ActiveCellsIBG, ActiveZColumnsIBG +using Oceananigans.DistributedComputations: child_architecture +using Oceananigans.DistributedComputations: Distributed +using Printf # constants for AB3 time stepping scheme (from https://doi.org/10.1016/j.ocemod.2004.08.002) const β = 0.281105 const α = 1.5 + β const θ = - 0.5 - 2β - const γ = 0.088 const δ = 0.614 const ϵ = 0.013 @@ -99,9 +103,9 @@ end # Time stepping extrapolation U★, and η★ # AB3 step -@inline function U★(i, j, k, grid, ::AdamsBashforth3Scheme, ϕᵐ, ϕᵐ⁻¹, ϕᵐ⁻²) +@inline function U★(i, j, k, grid, ::AdamsBashforth3Scheme, Uᵐ, Uᵐ⁻¹, Uᵐ⁻²) FT = eltype(grid) - return @inbounds FT(α) * ϕᵐ[i, j, k] + FT(θ) * ϕᵐ⁻¹[i, j, k] + FT(β) * ϕᵐ⁻²[i, j, k] + return @inbounds FT(α) * Uᵐ[i, j, k] + FT(θ) * Uᵐ⁻¹[i, j, k] + FT(β) * Uᵐ⁻²[i, j, k] end @inline function η★(i, j, k, grid, ::AdamsBashforth3Scheme, ηᵐ⁺¹, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) @@ -110,7 +114,7 @@ end end # Forward Backward Step -@inline U★(i, j, k, grid, ::ForwardBackwardScheme, ϕ, args...) = @inbounds ϕ[i, j, k] +@inline U★(i, j, k, grid, ::ForwardBackwardScheme, U, args...) = @inbounds U[i, j, k] @inline η★(i, j, k, grid, ::ForwardBackwardScheme, η, args...) = @inbounds η[i, j, k] @inline advance_previous_velocity!(i, j, k, ::ForwardBackwardScheme, U, Uᵐ⁻¹, Uᵐ⁻²) = nothing @@ -132,16 +136,13 @@ end return nothing end -using Oceananigans.DistributedComputations: Distributed -using Printf - -@kernel function split_explicit_free_surface_evolution_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) +@kernel function _split_explicit_free_surface!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) i, j = @index(Global, NTuple) - k_top = grid.Nz+1 + free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) +end +@inline function free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + k_top = grid.Nz+1 TX, TY, _ = topology(grid) @inbounds begin @@ -150,81 +151,100 @@ using Printf η[i, j, k_top] -= Δτ * (div_xᶜᶜᶠ_U(i, j, k_top-1, grid, TX, U★, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + div_yᶜᶜᶠ_V(i, j, k_top-1, grid, TY, U★, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²)) end + + return nothing end -@kernel function split_explicit_barotropic_velocity_evolution_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) +@kernel function _split_explicit_barotropic_velocity!(averaging_weight, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, + timestepper) i, j = @index(Global, NTuple) - k_top = grid.Nz + 1 + velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, + timestepper) +end +@inline function velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, + timestepper) + k_top = grid.Nz+1 + TX, TY, _ = topology(grid) @inbounds begin - advance_previous_velocity!(i, j, 1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) - advance_previous_velocity!(i, j, 1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) + advance_previous_velocity!(i, j, k_top-1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) # ∂τ(U) = - ∇η + G - U[i, j, 1] += Δτ * (- g * Hᶠᶜ[i, j] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, 1]) - V[i, j, 1] += Δτ * (- g * Hᶜᶠ[i, j] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, 1]) - + U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j, 1] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) + V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j, 1] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) + # time-averaging - η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] - U̅[i, j, 1] += averaging_weight * U[i, j, 1] - V̅[i, j, 1] += averaging_weight * V[i, j, 1] + η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] + U̅[i, j, k_top-1] += averaging_weight * U[i, j, k_top-1] + V̅[i, j, k_top-1] += averaging_weight * V[i, j, k_top-1] end end -function split_explicit_free_surface_substep!(η, state, auxiliary, settings, weights, arch, grid, g, Δτ, substep_index) - # unpack state quantities, parameters and forcing terms - U, V = state.U, state.V - Uᵐ⁻¹, Uᵐ⁻² = state.Uᵐ⁻¹, state.Uᵐ⁻² - Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² - ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² - η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ - Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ - - timestepper = settings.timestepper - averaging_weight = weights[substep_index] - - parameters = auxiliary.kernel_parameters - - args = (grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) +# Barotropic Model Kernels +# u_Δz = u * Δz +@kernel function _barotropic_mode_kernel!(U, V, grid, u, v) + i, j = @index(Global, NTuple) + k_top = grid.Nz+1 - launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, args...) - launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, args...) + # hand unroll first loop + @inbounds U[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] + @inbounds V[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] - return nothing + @unroll for k in 2:grid.Nz + @inbounds U[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] + @inbounds V[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] + end end # Barotropic Model Kernels # u_Δz = u * Δz -# Possibly we can optimize this further by passing in Val(Nz) in order to -# @unroll the loop over `k`. -@kernel function _barotropic_mode_kernel!(U, V, grid, u, v) - i, j = @index(Global, NTuple) +@kernel function _barotropic_mode_kernel!(U, V, grid::ActiveZColumnsIBG, u, v) + idx = @index(Global, Linear) + i, j = active_linear_index_to_tuple(idx, ZColumnMap(), grid) + k_top = grid.Nz+1 - @inbounds U[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] - @inbounds V[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] + # hand unroll first loop + @inbounds U[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] + @inbounds V[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] - for k in 2:grid.Nz - @inbounds U[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] - @inbounds V[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] + @unroll for k in 2:grid.Nz + @inbounds U[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] + @inbounds V[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] end end compute_barotropic_mode!(U, V, grid, u, v) = - launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v) + launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v; + active_cells_map = active_surface_map(grid)) -function initialize_free_surface_state!(free_surface_state, η) - state = free_surface_state +function initialize_free_surface_state!(state, η, timestepper) parent(state.U) .= parent(state.U̅) parent(state.V) .= parent(state.V̅) + initialize_auxiliary_state!(state, η, timestepper) + + fill!(state.η̅, 0) + fill!(state.U̅, 0) + fill!(state.V̅, 0) + + return nothing +end + +initialize_auxiliary_state!(state, η, ::ForwardBackwardScheme) = nothing + +function initialize_auxiliary_state!(state, η, timestepper) parent(state.Uᵐ⁻¹) .= parent(state.U̅) parent(state.Vᵐ⁻¹) .= parent(state.V̅) @@ -235,35 +255,32 @@ function initialize_free_surface_state!(free_surface_state, η) parent(state.ηᵐ⁻¹) .= parent(η) parent(state.ηᵐ⁻²) .= parent(η) - fill!(state.η̅, 0) - fill!(state.U̅, 0) - fill!(state.V̅, 0) - return nothing end -@kernel function barotropic_split_explicit_corrector_kernel!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ) +@kernel function _barotropic_split_explicit_corrector!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) i, j, k = @index(Global, NTuple) + k_top = grid.Nz+1 @inbounds begin - u[i, j, k] = u[i, j, k] + (U̅[i, j] - U[i, j]) / Hᶠᶜ[i, j] - v[i, j, k] = v[i, j, k] + (V̅[i, j] - V[i, j]) / Hᶜᶠ[i, j] + u[i, j, k] = u[i, j, k] + (U̅[i, j, k_top-1] - U[i, j, k_top-1]) / Hᶠᶜ[i, j, 1] + v[i, j, k] = v[i, j, k] + (V̅[i, j, k_top-1] - V[i, j, k_top-1]) / Hᶜᶠ[i, j, 1] end end function barotropic_split_explicit_corrector!(u, v, free_surface, grid) sefs = free_surface.state - Hᶠᶜ, Hᶜᶠ = free_surface.auxiliary.Hᶠᶜ, free_surface.auxiliary.Hᶜᶠ U, V, U̅, V̅ = sefs.U, sefs.V, sefs.U̅, sefs.V̅ + Hᶠᶜ, Hᶜᶠ = free_surface.auxiliary.Hᶠᶜ, free_surface.auxiliary.Hᶜᶠ arch = architecture(grid) + # take out "bad" barotropic mode, # !!!! reusing U and V for this storage since last timestep doesn't matter compute_barotropic_mode!(U, V, grid, u, v) # add in "good" barotropic mode - - launch!(arch, grid, :xyz, barotropic_split_explicit_corrector_kernel!, - u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ) + launch!(arch, grid, :xyz, _barotropic_split_explicit_corrector!, + u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) return nothing end @@ -288,11 +305,24 @@ function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurfac # Wait for previous set up wait_free_surface_communication!(free_surface, architecture(free_surface_grid)) + # Calculate the substepping parameterers + settings = free_surface.settings + Nsubsteps = calculate_substeps(settings.substepping, Δt) + + # barotropic time step as fraction of baroclinic step and averaging weights + fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) + Nsubsteps = length(weights) + + # barotropic time step in seconds + Δτᴮ = fractional_Δt * Δt + # reset free surface averages @apply_regionally begin - initialize_free_surface_state!(free_surface.state, free_surface.η) + initialize_free_surface_state!(free_surface.state, free_surface.η, settings.timestepper) + # Solve for the free surface at tⁿ⁺¹ - iterate_split_explicit!(free_surface, free_surface_grid, Δt) + iterate_split_explicit!(free_surface, free_surface_grid, Δτᴮ, weights, Val(Nsubsteps)) + # Reset eta for the next timestep set!(free_surface.η, free_surface.state.η̅) end @@ -324,7 +354,10 @@ const MINIMUM_SUBSTEPS = 5 @inline calculate_adaptive_settings(substepping::FTS, substeps) = weights_from_substeps(eltype(substepping.Δt_barotropic), substeps, substepping.averaging_kernel) -function iterate_split_explicit!(free_surface, grid, Δt) +const FixedSubstepsSetting{N} = SplitExplicitSettings{<:FixedSubstepNumber{<:Any, <:NTuple{N, <:Any}}} where N +const FixedSubstepsSplitExplicit{F} = SplitExplicitFreeSurface{<:Any, <:Any, <:Any, <:Any, <:FixedSubstepsSetting{N}} where N + +function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsubsteps}) where Nsubsteps arch = architecture(grid) η = free_surface.η @@ -332,16 +365,46 @@ function iterate_split_explicit!(free_surface, grid, Δt) auxiliary = free_surface.auxiliary settings = free_surface.settings g = free_surface.gravitational_acceleration - - Nsubsteps = calculate_substeps(settings.substepping, Δt) - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - - Nsubsteps = length(weights) - - Δτᴮ = fractional_Δt * Δt # barotropic time step in seconds - - for substep in 1:Nsubsteps - split_explicit_free_surface_substep!(η, state, auxiliary, settings, weights, arch, grid, g, Δτᴮ, substep) + + # unpack state quantities, parameters and forcing terms + U, V = state.U, state.V + Uᵐ⁻¹, Uᵐ⁻² = state.Uᵐ⁻¹, state.Uᵐ⁻² + Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² + ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² + η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ + + timestepper = settings.timestepper + + parameters = auxiliary.kernel_parameters + + free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) + barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) + + η_args = (grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + timestepper) + + U_args = (grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, + timestepper) + + GC.@preserve η_args U_args begin + + # We need to perform ~50 time-steps which means + # launching ~100 very small kernels: we are limited by + # latency of argument conversion to GPU-compatible values. + # To alleviate this penalty we convert first and then we substep! + converted_η_args = convert_args(arch, η_args) + converted_U_args = convert_args(arch, U_args) + + @unroll for substep in 1:Nsubsteps + Base.@_inline_meta + averaging_weight = weights[substep] + free_surface_kernel!(converted_η_args...) + barotropic_velocity_kernel!(averaging_weight, converted_U_args...) + end end return nothing @@ -351,14 +414,32 @@ end # Possibly we can optimize this further by passing in Val(Nz) in order to # @unroll the loop over `k`. @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) - i, j = @index(Global, NTuple) + i, j = @index(Global, NTuple) + k_top = grid.Nz+1 - @inbounds Gᵁ[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) + # hand unroll first loop + @inbounds Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) - for k in 2:grid.Nz - @inbounds Gᵁ[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) + @unroll for k in 2:grid.Nz + @inbounds Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) + end +end + +# Calculate RHS for the barotopic time step. +@kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid::ActiveZColumnsIBG, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) + idx = @index(Global, Linear) + i, j = active_linear_index_to_tuple(idx, ZColumnMap(), grid) + k_top = grid.Nz+1 + + # hand unroll first loop + @inbounds Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) + + @unroll for k in 2:grid.Nz + @inbounds Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) end end @@ -371,9 +452,7 @@ end # Setting up the RHS for the barotropic step (tendencies of the barotopic velocity components) # This function is called after `calculate_tendency` and before `ab2_step_velocities!` function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) - - free_surface_grid = free_surface.η.grid - + # we start the time integration of η from the average ηⁿ Gu⁻ = model.timestepper.G⁻.u Gv⁻ = model.timestepper.G⁻.v @@ -382,7 +461,7 @@ function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) auxiliary = free_surface.auxiliary - @apply_regionally setup_split_explicit_tendency!(auxiliary, free_surface_grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) + @apply_regionally setup_split_explicit_tendency!(auxiliary, model.grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) fields_to_fill = (auxiliary.Gᵁ, auxiliary.Gⱽ) fill_halo_regions!(fields_to_fill; async = true) @@ -391,6 +470,8 @@ function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) end setup_split_explicit_tendency!(auxiliary, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) = - launch!(architecture(grid), grid, :xy, _compute_integrated_ab2_tendencies!, auxiliary.Gᵁ, auxiliary.Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) - + launch!(architecture(grid), grid, :xy, _compute_integrated_ab2_tendencies!, auxiliary.Gᵁ, auxiliary.Gⱽ, grid, + Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ; active_cells_map = active_surface_map(grid)) + wait_free_surface_communication!(free_surface, arch) = nothing + diff --git a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl index 2473b6ff7d..fa5e391a42 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl @@ -4,6 +4,7 @@ using Oceananigans.TimeSteppers: store_field_tendencies! using Oceananigans: prognostic_fields using Oceananigans.Grids: AbstractGrid +using Oceananigans.ImmersedBoundaries: active_interior_map using Oceananigans.Utils: launch! @@ -36,7 +37,6 @@ function store_tendencies!(model::HydrostaticFreeSurfaceModel) launch!(model.architecture, model.grid, :xyz, store_field_tendencies!, model.timestepper.G⁻[field_name], - model.grid, model.timestepper.Gⁿ[field_name]) end diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index 4477505945..4cf7c19521 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -36,7 +36,6 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp @apply_regionally update_model_field_time_series!(model, model.clock) fill_halo_regions!(prognostic_fields(model), model.clock, fields(model); async = true) - @apply_regionally replace_horizontal_vector_halos!(model.velocities, model.grid) @apply_regionally compute_auxiliaries!(model) diff --git a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl index 4284e70f22..70bc3b922a 100644 --- a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl +++ b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl @@ -3,8 +3,8 @@ using Oceananigans: fields, TendencyCallsite using Oceananigans.Utils: work_layout using Oceananigans.Models: complete_communication_and_compute_boundary!, interior_tendency_kernel_parameters -using Oceananigans.ImmersedBoundaries: use_only_active_interior_cells, ActiveCellsIBG, - InteriorMap, active_linear_index_to_interior_tuple +using Oceananigans.ImmersedBoundaries: active_interior_map, ActiveCellsIBG, + InteriorMap, active_linear_index_to_tuple import Oceananigans.TimeSteppers: compute_tendencies! @@ -28,7 +28,7 @@ function compute_tendencies!(model::NonhydrostaticModel, callbacks) # interior of the domain kernel_parameters = tuple(interior_tendency_kernel_parameters(model.grid)) - compute_interior_tendency_contributions!(model, kernel_parameters; only_active_cells = use_only_active_interior_cells(model.grid)) + compute_interior_tendency_contributions!(model, kernel_parameters; active_cells_map = active_interior_map(model.grid)) complete_communication_and_compute_boundary!(model, model.grid, model.architecture) # Calculate contributions to momentum and tracer tendencies from user-prescribed fluxes across the @@ -50,7 +50,7 @@ function compute_tendencies!(model::NonhydrostaticModel, callbacks) end """ Store previous value of the source term and compute current source term. """ -function compute_interior_tendency_contributions!(model, kernel_parameters; only_active_cells = nothing) +function compute_interior_tendency_contributions!(model, kernel_parameters; active_cells_map = nothing) tendencies = model.timestepper.Gⁿ arch = model.architecture @@ -91,16 +91,16 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; only for parameters in kernel_parameters launch!(arch, grid, parameters, compute_Gu!, - tendencies.u, grid, only_active_cells, u_kernel_args; - only_active_cells) + tendencies.u, grid, active_cells_map, u_kernel_args; + active_cells_map) launch!(arch, grid, parameters, compute_Gv!, - tendencies.v, grid, only_active_cells, v_kernel_args; - only_active_cells) + tendencies.v, grid, active_cells_map, v_kernel_args; + active_cells_map) launch!(arch, grid, parameters, compute_Gw!, - tendencies.w, grid, only_active_cells, w_kernel_args; - only_active_cells) + tendencies.w, grid, active_cells_map, w_kernel_args; + active_cells_map) end start_tracer_kernel_args = (advection, closure) @@ -120,8 +120,8 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; only for parameters in kernel_parameters launch!(arch, grid, parameters, compute_Gc!, - c_tendency, grid, only_active_cells, args; - only_active_cells) + c_tendency, grid, active_cells_map, args; + active_cells_map) end end @@ -138,9 +138,9 @@ end @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gu!(Gu, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gu!(Gu, grid::ActiveCellsIBG, map::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...) end @@ -150,9 +150,9 @@ end @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gv!(Gv, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gv!(Gv, grid::ActiveCellsIBG, map::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...) end @@ -162,9 +162,9 @@ end @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gw!(Gw, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gw!(Gw, grid::ActiveCellsIBG, map, ::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...) end @@ -178,9 +178,9 @@ end @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...) end -@kernel function compute_Gc!(Gc, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gc!(Gc, grid::ActiveCellsIBG, map::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...) end diff --git a/src/MultiRegion/multi_region_split_explicit_free_surface.jl b/src/MultiRegion/multi_region_split_explicit_free_surface.jl index 7349324fec..d33f6bd1a6 100644 --- a/src/MultiRegion/multi_region_split_explicit_free_surface.jl +++ b/src/MultiRegion/multi_region_split_explicit_free_surface.jl @@ -11,14 +11,11 @@ function SplitExplicitAuxiliaryFields(grid::MultiRegionGrids) Hᶠᶜ = Field((Face, Center, Nothing), grid) Hᶜᶠ = Field((Center, Face, Nothing), grid) - Hᶜᶜ = Field((Center, Center, Nothing), grid) @apply_regionally calculate_column_height!(Hᶠᶜ, (Face, Center, Center)) @apply_regionally calculate_column_height!(Hᶜᶠ, (Center, Face, Center)) - @apply_regionally calculate_column_height!(Hᶜᶜ, (Center, Center, Center)) - - fill_halo_regions!((Hᶠᶜ, Hᶜᶠ, Hᶜᶜ)) + fill_halo_regions!((Hᶠᶜ, Hᶜᶠ)) # In a non-parallel grid we calculate only the interior @apply_regionally kernel_size = augmented_kernel_size(grid, grid.partition) @@ -26,7 +23,7 @@ function SplitExplicitAuxiliaryFields(grid::MultiRegionGrids) @apply_regionally kernel_parameters = KernelParameters(kernel_size, kernel_offsets) - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, Hᶜᶜ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, kernel_parameters) end @inline function calculate_column_height!(height, location) @@ -55,7 +52,7 @@ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid::M η = ZFaceField(new_grid, indices = (:, :, size(new_grid, 3)+1)) return SplitExplicitFreeSurface(η, - SplitExplicitState(new_grid), + SplitExplicitState(new_grid, free_surface.settings.timestepper), SplitExplicitAuxiliaryFields(new_grid), free_surface.gravitational_acceleration, free_surface.settings) diff --git a/src/Solvers/batched_tridiagonal_solver.jl b/src/Solvers/batched_tridiagonal_solver.jl index 8cd68a6631..ec5872a846 100644 --- a/src/Solvers/batched_tridiagonal_solver.jl +++ b/src/Solvers/batched_tridiagonal_solver.jl @@ -129,7 +129,10 @@ end @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction::XDirection) Nx = size(grid, 1) j, k = @index(Global, NTuple) + solve_batched_tridiagonal_system_x!(j, k, Nx, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +end +@inline function solve_batched_tridiagonal_system_x!(j, k, Nx, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) @inbounds begin β = get_coefficient(1, j, k, grid, b, p, tridiagonal_direction, args...) f₁ = get_coefficient(1, j, k, grid, f, p, tridiagonal_direction, args...) @@ -161,7 +164,10 @@ end @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction::YDirection) Ny = size(grid, 2) i, k = @index(Global, NTuple) + solve_batched_tridiagonal_system_y!(i, k, Ny, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +end +@inline function solve_batched_tridiagonal_system_y!(i, k, Ny, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) @inbounds begin β = get_coefficient(i, 1, k, grid, b, p, tridiagonal_direction, args...) f₁ = get_coefficient(i, 1, k, grid, f, p, tridiagonal_direction, args...) @@ -193,7 +199,10 @@ end @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction::ZDirection) Nz = size(grid, 3) i, j = @index(Global, NTuple) + solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +end +@inline function solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) @inbounds begin β = get_coefficient(i, j, 1, grid, b, p, tridiagonal_direction, args...) f₁ = get_coefficient(i, j, 1, grid, f, p, tridiagonal_direction, args...) diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index 19f5f1beb3..780df779b6 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -1,6 +1,7 @@ using Oceananigans.Fields: FunctionField, location using Oceananigans.TurbulenceClosures: implicit_step! using Oceananigans.Utils: @apply_regionally, apply_regionally! +using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, active_linear_index_to_tuple mutable struct QuasiAdamsBashforth2TimeStepper{FT, GT, IT} <: AbstractTimeStepper χ :: FT diff --git a/src/TimeSteppers/store_tendencies.jl b/src/TimeSteppers/store_tendencies.jl index 06d179bd3a..e2f1b07c7e 100644 --- a/src/TimeSteppers/store_tendencies.jl +++ b/src/TimeSteppers/store_tendencies.jl @@ -1,10 +1,10 @@ using Oceananigans: prognostic_fields using Oceananigans.Grids: AbstractGrid - +using Oceananigans.ImmersedBoundaries: ActiveCellsIBG using Oceananigans.Utils: launch! """ Store source terms for `u`, `v`, and `w`. """ -@kernel function store_field_tendencies!(G⁻, grid, G⁰) +@kernel function store_field_tendencies!(G⁻, G⁰) i, j, k = @index(Global, NTuple) @inbounds G⁻[i, j, k] = G⁰[i, j, k] end @@ -16,7 +16,6 @@ function store_tendencies!(model) for field_name in keys(model_fields) launch!(model.architecture, model.grid, :xyz, store_field_tendencies!, model.timestepper.G⁻[field_name], - model.grid, model.timestepper.Gⁿ[field_name]) end diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl b/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl index b65de06ff4..db17a26370 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl @@ -3,7 +3,6 @@ using Oceananigans.BuoyancyModels: ∂z_b using Oceananigans.Operators using Oceananigans.Grids: inactive_node using Oceananigans.Operators: ℑzᵃᵃᶜ -using Oceananigans.Utils: use_only_active_interior_cells struct RiBasedVerticalDiffusivity{TD, FT, R} <: AbstractScalarDiffusivity{TD, VerticalFormulation, 1} ν₀ :: FT diff --git a/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl b/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl index 71fa9bd8e4..56933e77e9 100644 --- a/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl +++ b/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl @@ -178,7 +178,8 @@ function implicit_step!(field::Field, diffusivity_fields, tracer_index, clock, - Δt) + Δt; + kwargs...) loc = location(field) @@ -208,6 +209,6 @@ function implicit_step!(field::Field, return solve!(field, implicit_solver, field, # ivd_*_diagonal gets called with these args after (i, j, k, grid): - vi_closure, vi_diffusivity_fields, tracer_index, map(ℓ -> ℓ(), loc)..., clock, Δt, κz) + vi_closure, vi_diffusivity_fields, tracer_index, map(ℓ -> ℓ(), loc)..., clock, Δt, κz; kwargs...) end diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 6c6543692c..761c8d23a6 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -1,6 +1,6 @@ module Utils -export launch_config, work_layout, launch!, KernelParameters +export configured_kernel, work_layout, launch!, KernelParameters export prettytime, pretty_filesize export tupleit, parenttuple, datatuple, datatuples export validate_intervals, time_to_run diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 5a2589c5e1..b0d3338669 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -7,7 +7,6 @@ using Oceananigans.Utils using Oceananigans.Grids using Oceananigans.Grids: AbstractGrid - import Base struct KernelParameters{S, O} end @@ -106,8 +105,7 @@ function work_layout(grid, workdims::Symbol; include_right_boundaries=false, loc return workgroup, worksize end -@inline active_cells_work_layout(workgroup, worksize, only_active_cells, grid) = workgroup, worksize -@inline use_only_active_interior_cells(grid) = nothing +@inline active_cells_work_layout(workgroup, worksize, active_cells_map, grid) = workgroup, worksize """ launch!(arch, grid, layout, kernel!, args...; kwargs...) @@ -119,9 +117,55 @@ function launch!(arch, grid, workspec, kernel!, kernel_args...; include_right_boundaries = false, reduced_dimensions = (), location = nothing, - only_active_cells = nothing, + active_cells_map = nothing, kwargs...) + loop! = configured_kernel(arch, grid, workspec, kernel!; + include_right_boundaries, + reduced_dimensions, + location, + active_cells_map, + kwargs...) + + + !isnothing(loop!) && loop!(kernel_args...) + + return nothing +end + + +""" + configured_kernel(arch, grid, workspec, kernel!; include_right_boundaries=false, reduced_dimensions=(), location=nothing, active_cells_map=nothing, kwargs...) + +Configures a kernel with the specified architecture, grid, workspec, and kernel function. +If `active_cells_map` is provided, the work distribution is adjusted to include only the active cells. +If the worksize is 0 after adjusting for active cells, the function returns `nothing`. + +# Arguments +============ + +- `arch`: The architecture on which the kernel will be launched. +- `grid`: The grid on which the kernel will be executed. +- `workspec`: The workspec that defines the work distribution. +- `kernel!`: The kernel function to be executed. + +# Keyword Arguments +==================== + +- `include_right_boundaries`: A boolean indicating whether to include right boundaries `(N + 1)`. Default is `false`. +- `reduced_dimensions`: A tuple specifying the dimensions to be reduced in the work distribution. Default is an empty tuple. +- `location`: The location of the kernel execution, needed for `include_right_boundaries`. Default is `nothing`. +- `active_cells_map`: A map indicating the active cells in the grid. If the map is not a nothing, the workspec will be disregarded and + the kernel is configured as a linear kernel with a worksize equal to the length of the active cell map. Default is `nothing`. +""" + +function configured_kernel(arch, grid, workspec, kernel!; + include_right_boundaries = false, + reduced_dimensions = (), + location = nothing, + active_cells_map = nothing, + kwargs...) + workgroup, worksize = work_layout(grid, workspec; include_right_boundaries, reduced_dimensions, @@ -129,26 +173,23 @@ function launch!(arch, grid, workspec, kernel!, kernel_args...; offset = offsets(workspec) - if !isnothing(only_active_cells) - workgroup, worksize = active_cells_work_layout(workgroup, worksize, only_active_cells, grid) + if !isnothing(active_cells_map) + workgroup, worksize = active_cells_work_layout(workgroup, worksize, active_cells_map, grid) offset = nothing - end - if worksize == 0 - return nothing + # A non active domain! + if worksize == 0 + return nothing + end end - + # We can only launch offset kernels with Static sizes!!!! loop! = isnothing(offset) ? kernel!(Architectures.device(arch), workgroup, worksize) : kernel!(Architectures.device(arch), StaticSize(workgroup), OffsetStaticSize(contiguousrange(worksize, offset))) - @debug "Launching kernel $kernel! with worksize $worksize and offsets $offset from $workspec" - - loop!(kernel_args...) - - return nothing + return loop! end - + # When dims::Val @inline launch!(arch, grid, ::Val{workspec}, args...; kwargs...) where workspec = launch!(arch, grid, workspec, args...; kwargs...) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index ccb8aeb005..b4e7a53b50 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -1,7 +1,7 @@ include("dependencies_for_runtests.jl") using Oceananigans.Models.HydrostaticFreeSurfaceModels import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitFreeSurface -import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings, split_explicit_free_surface_substep! +import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings, iterate_split_explicit! using Oceananigans.Models.HydrostaticFreeSurfaceModels: constant_averaging_kernel using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calculate_adaptive_settings @@ -12,51 +12,35 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc for arch in archs topology = (Periodic, Periodic, Bounded) - Nx, Ny, Nz = 128, 64, 16 - Lx = Ly = Lz = 2π + Nx, Ny, Nz = 128, 64, 1 + Lx = Ly = 2π + Lz = 1 / Oceananigans.BuoyancyModels.g_Earth grid = RectilinearGrid(arch, FT; topology, size = (Nx, Ny, Nz), x = (0, Lx), y = (0, Ly), z = (-Lz, 0), halo=(1, 1, 1)) - settings = SplitExplicitSettings(; substeps = 200, averaging_kernel = constant_averaging_kernel) + settings = SplitExplicitSettings(eltype(grid); substeps = 200, averaging_kernel = constant_averaging_kernel) sefs = SplitExplicitFreeSurface(grid; settings) sefs.η .= 0 @testset " One timestep test " begin state = sefs.state - auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ - Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - Hᶠᶜ, Hᶜᶠ = sefs.auxiliary.Hᶠᶜ, sefs.auxiliary.Hᶜᶠ - g = sefs.gravitational_acceleration - Hᶠᶜ .= 1 / g - Hᶜᶠ .= 1 / g η = sefs.η - velocity_weight = 0.0 - free_surface_weight = 0.0 Δτ = 1.0 η₀(x, y, z) = sin(x) set!(η, η₀) - U₀(x, y) = 0 - set!(U, U₀) - V₀(x, y) = 0 - set!(V, V₀) - - η̅ .= 0 - U̅ .= 0 - V̅ .= 0 - Gᵁ .= 0 - Gⱽ .= 0 - - Nsubsteps = calculate_substeps(settings.substepping, 1) + + Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ, 1) + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) + U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] U_exact = (reshape(-cos.(grid.xᶠᵃᵃ), (length(grid.xᶜᵃᵃ), 1)).+reshape(0 * grid.yᵃᶜᵃ, (1, length(grid.yᵃᶜᵃ))))[2:Nx+1, 2:Ny+1] @@ -68,50 +52,43 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - g = sefs.gravitational_acceleration - sefs.auxiliary.Hᶠᶜ .= 1 / g - sefs.auxiliary.Hᶜᶠ .= 1 / g η = sefs.η - velocity_weight = 0.0 - free_surface_weight = 0.0 - T = 2π + T = 2π Δτ = 2π / maximum([Nx, Ny]) * 5e-2 # the last factor is essentially the order of accuracy Nt = floor(Int, T / Δτ) Δτ_end = T - Nt * Δτ settings = SplitExplicitSettings(; substeps = Nt, averaging_kernel = constant_averaging_kernel) + sefs = sefs(settings) # set!(η, f(x,y)) η₀(x, y, z) = sin(x) set!(η, η₀) - U₀(x, y) = 0 + U₀(x, y, z) = 0 set!(U, U₀) - V₀(x, y) = 0 + V₀(x, y, z) = 0 set!(V, V₀) - η̅ .= 0 - U̅ .= 0 - V̅ .= 0 + η̅ .= 0 + U̅ .= 0 + V̅ .= 0 Gᵁ .= 0 Gⱽ .= 0 - Nsubsteps = calculate_substeps(settings.substepping, 1) - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - + weights = settings.substepping.averaging_weights + for i in 1:Nt - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, settings, weights, arch, grid, g, Δτ, i) + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) end - - # + correction for exact time - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, settings, weights, arch, grid, g, Δτ_end, 1) - - U_computed = Array(parent(U))[2:Nx+1, 2:Ny+1] - η_computed = Array(parent(η))[2:Nx+1, 2:Ny+1] + iterate_split_explicit!(sefs, grid, Δτ_end, weights, Val(1)) + + U_computed = Array(deepcopy(interior(U))) + η_computed = Array(deepcopy(interior(η))) set!(η, η₀) set!(U, U₀) - U_exact = Array(parent(U))[2:Nx+1, 2:Ny+1] - η_exact = Array(parent(η))[2:Nx+1, 2:Ny+1] + U_exact = Array(deepcopy(interior(U))) + η_exact = Array(deepcopy(interior(η))) @test maximum(abs.(U_computed - U_exact)) < 1e-3 @show maximum(abs.(η_computed)) @@ -119,6 +96,11 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc @test maximum(abs.(η_computed - η_exact)) < max(100eps(FT), 1e-6) end + settings = SplitExplicitSettings(eltype(grid); substeps = 200, averaging_kernel = constant_averaging_kernel) + sefs = SplitExplicitFreeSurface(grid; settings) + + sefs.η .= 0 + @testset "Averaging / Do Nothing test " begin state = sefs.state auxiliary = sefs.auxiliary @@ -126,46 +108,40 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ g = sefs.gravitational_acceleration - sefs.auxiliary.Hᶠᶜ .= 1 / g - sefs.auxiliary.Hᶜᶠ .= 1 / g η = sefs.η - velocity_weight = 0.0 - free_surface_weight = 0.0 - Δτ = 2π / maximum([Nx, Ny]) * 5e-2 # the last factor is essentially the order of accuracy + Δτ = 2π / maximum([Nx, Ny]) * 1e-2 # the last factor is essentially the order of accuracy # set!(η, f(x, y)) η_avg = 1 U_avg = 2 V_avg = 3 - η₀(x, y, z) = η_avg - set!(η, η₀) - U₀(x, y) = U_avg - set!(U, U₀) - V₀(x, y) = V_avg - set!(V, V₀) + fill!(η, η_avg) + fill!(U, U_avg) + fill!(V, V_avg) + + fill!(η̅ , 0) + fill!(U̅ , 0) + fill!(V̅ , 0) + fill!(Gᵁ, 0) + fill!(Gⱽ, 0) - η̅ .= 0 - U̅ .= 0 - V̅ .= 0 - Gᵁ .= 0 - Gⱽ .= 0 settings = sefs.settings Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - - for i in 1:Nsubsteps - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ, i) + + for step in 1:Nsubsteps + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) end - U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] - V_computed = Array(V.data.parent)[2:Nx+1, 2:Ny+1] - η_computed = Array(η.data.parent)[2:Nx+1, 2:Ny+1] + U_computed = Array(deepcopy(interior(U))) + V_computed = Array(deepcopy(interior(V))) + η_computed = Array(deepcopy(interior(η))) - U̅_computed = Array(U̅.data.parent)[2:Nx+1, 2:Ny+1] - V̅_computed = Array(V̅.data.parent)[2:Nx+1, 2:Ny+1] - η̅_computed = Array(η̅.data.parent)[2:Nx+1, 2:Ny+1] + U̅_computed = Array(deepcopy(interior(U̅))) + V̅_computed = Array(deepcopy(interior(V̅))) + η̅_computed = Array(deepcopy(interior(η̅))) tolerance = 100eps(FT) @@ -197,8 +173,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ η = sefs.η g = sefs.gravitational_acceleration - sefs.auxiliary.Hᶠᶜ .= 1 / g # to make life easy - sefs.auxiliary.Hᶜᶠ .= 1 / g # to make life easy # set!(η, f(x,y)) k² = ω² gu_c = 1 @@ -216,47 +190,44 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Gᵁ .= gu_c Gⱽ .= gv_c - settings = SplitExplicitSettings(substeps = Nt + 1, averaging_kernel = constant_averaging_kernel) + settings = SplitExplicitSettings(eltype(grid); substeps = Nt + 1, averaging_kernel = constant_averaging_kernel) sefs = sefs(settings) - Nsubsteps = calculate_substeps(settings.substepping, 1) - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - + weights = settings.substepping.averaging_weights for i in 1:Nt - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ, i) + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) end - # + correction for exact time - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ_end, Nt + 1) + iterate_split_explicit!(sefs, grid, Δτ_end, weights, Val(1)) η_mean_after = mean(Array(interior(η))) tolerance = 10eps(FT) @test abs(η_mean_after - η_mean_before) < tolerance - η_computed = Array(η.data.parent)[2:Nx+1, 2:Ny+1] - U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] - V_computed = Array(V.data.parent)[2:Nx+1, 2:Ny+1] + η_computed = Array(deepcopy(interior(η, :, 1, 1))) + U_computed = Array(deepcopy(interior(U, :, 1, 1))) + V_computed = Array(deepcopy(interior(V, :, 1, 1))) - η̅_computed = Array(η̅.data.parent)[2:Nx+1, 2:Ny+1] - U̅_computed = Array(U̅.data.parent)[2:Nx+1, 2:Ny+1] - V̅_computed = Array(V̅.data.parent)[2:Nx+1, 2:Ny+1] + η̅_computed = Array(deepcopy(interior(η̅, :, 1, 1))) + U̅_computed = Array(deepcopy(interior(U̅, :, 1, 1))) + V̅_computed = Array(deepcopy(interior(V̅, :, 1, 1))) set!(η, η₀) # ∂ₜₜ(η) = Δη - η_exact = cos(ω * T) * (Array(η.data.parent)[2:Nx+1, 2:Ny+1] .- 1) .+ 1 + η_exact = cos(ω * T) * (Array(interior(η, :, 1, 1)) .- 1) .+ 1 - U₀(x, y) = kx * cos(kx * x) * sin(ky * y) # ∂ₜU = - ∂x(η), since we know η + U₀(x, y, z) = kx * cos(kx * x) * sin(ky * y) # ∂ₜU = - ∂x(η), since we know η set!(U, U₀) - U_exact = -(sin(ω * T) * 1 / ω) .* Array(U.data.parent)[2:Nx+1, 2:Ny+1] .+ gu_c * T + U_exact = -(sin(ω * T) * 1 / ω) .* Array(interior(U, :, 1, 1)) .+ gu_c * T - V₀(x, y) = ky * sin(kx * x) * cos(ky * y) # ∂ₜV = - ∂y(η), since we know η + V₀(x, y, z) = ky * sin(kx * x) * cos(ky * y) # ∂ₜV = - ∂y(η), since we know η set!(V, V₀) - V_exact = -(sin(ω * T) * 1 / ω) .* Array(V.data.parent)[2:Nx+1, 2:Ny+1] .+ gv_c * T + V_exact = -(sin(ω * T) * 1 / ω) .* Array(interior(V, :, 1, 1)) .+ gv_c * T - η̅_exact = (sin(ω * T) / ω - sin(ω * 0) / ω) / T * (Array(η.data.parent)[2:Nx+1, 2:Ny+1] .- 1) .+ 1 - U̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(U.data.parent)[2:Nx+1, 2:Ny+1] .+ gu_c * T / 2 - V̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(V.data.parent)[2:Nx+1, 2:Ny+1] .+ gv_c * T / 2 + η̅_exact = (sin(ω * T) / ω - sin(ω * 0) / ω) / T * (Array(interior(η, :, 1, 1)) .- 1) .+ 1 + U̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(interior(U, :, 1, 1)) .+ gu_c * T / 2 + V̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(interior(V, :, 1, 1)) .+ gv_c * T / 2 tolerance = 1e-2 diff --git a/test/test_split_explicit_vertical_integrals.jl b/test/test_split_explicit_vertical_integrals.jl index aad8fbd8d6..6016053926 100644 --- a/test/test_split_explicit_vertical_integrals.jl +++ b/test/test_split_explicit_vertical_integrals.jl @@ -1,8 +1,8 @@ include("dependencies_for_runtests.jl") using Oceananigans.Models.HydrostaticFreeSurfaceModels -import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitFreeSurface -import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings -import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode!, barotropic_split_explicit_corrector!, initialize_free_surface_state! +using Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitFreeSurface +using Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings +using Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode!, barotropic_split_explicit_corrector!, initialize_free_surface_state! @testset "Barotropic Kernels" begin @@ -17,7 +17,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode grid = RectilinearGrid(arch, topology = topology, size = (Nx, Ny, Nz), x = (0, Lx), y = (0, Ly), z = (-Lz, 0)) tmp = SplitExplicitFreeSurface(; substeps = 200) - sefs = SplitExplicitState(grid) + sefs = SplitExplicitState(grid, tmp.settings.timestepper) sefs = SplitExplicitAuxiliaryFields(grid) sefs = SplitExplicitFreeSurface(grid) @@ -25,7 +25,6 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - Hᶠᶜ, Hᶜᶠ = sefs.auxiliary.Hᶠᶜ, sefs.auxiliary.Hᶜᶠ u = Field{Face,Center,Center}(grid) v = Field{Center,Face,Center}(grid) @@ -34,7 +33,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode # set equal to something else η̅ .= U̅ .= V̅ .= 1.0 # now set equal to zero - initialize_free_surface_state!(sefs.state, sefs.η) + initialize_free_surface_state!(sefs.state, sefs.η, sefs.settings.timestepper) # don't forget the ghost points fill_halo_regions!(η̅) fill_halo_regions!(U̅) @@ -51,7 +50,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode Δz .= grid.Δzᵃᵃᶠ set_u_check(x, y, z) = cos((π / 2) * z / Lz) - set_U_check(x, y) = (sin(0) - (-2 * Lz / (π))) + set_U_check(x, y, z) = (sin(0) - (-2 * Lz / (π))) set!(u, set_u_check) exact_U = similar(U) set!(exact_U, set_U_check) @@ -60,7 +59,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode @test all((Array(interior(U) .- interior(exact_U))) .< tolerance) set_v_check(x, y, z) = sin(x * y) * cos((π / 2) * z / Lz) - set_V_check(x, y) = sin(x * y) * (sin(0) - (-2 * Lz / (π))) + set_V_check(x, y, z) = sin(x * y) * (sin(0) - (-2 * Lz / (π))) set!(v, set_v_check) exact_V = similar(V) set!(exact_V, set_V_check) @@ -83,7 +82,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode @test all(Array(interior(U)) .≈ Lz) set_u_check(x, y, z) = sin(x) - set_U_check(x, y) = sin(x) * Lz + set_U_check(x, y, z) = sin(x) * Lz set!(u, set_u_check) exact_U = similar(U) set!(exact_U, set_U_check) @@ -91,7 +90,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode @test all(Array(interior(U)) .≈ Array(interior(exact_U))) set_v_check(x, y, z) = sin(x) * z * cos(y) - set_V_check(x, y) = -sin(x) * Lz^2 / 2.0 * cos(y) + set_V_check(x, y, z) = -sin(x) * Lz^2 / 2.0 * cos(y) set!(v, set_v_check) exact_V = similar(V) set!(exact_V, set_V_check) @@ -116,7 +115,6 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - Hᶠᶜ, Hᶜᶠ = sefs.auxiliary.Hᶠᶜ, sefs.auxiliary.Hᶜᶠ u = Field{Face,Center,Center}(grid) v = Field{Center,Face,Center}(grid) @@ -124,22 +122,19 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode v_corrected = similar(v) set_u(x, y, z) = z + Lz / 2 + sin(x) - set_U̅(x, y) = cos(x) * Lz + set_U̅(x, y, z) = cos(x) * Lz set_u_corrected(x, y, z) = z + Lz / 2 + cos(x) set!(u, set_u) set!(U̅, set_U̅) set!(u_corrected, set_u_corrected) set_v(x, y, z) = (z + Lz / 2) * sin(y) + sin(x) - set_V̅(x, y) = (cos(x) + x) * Lz + set_V̅(x, y, z) = (cos(x) + x) * Lz set_v_corrected(x, y, z) = (z + Lz / 2) * sin(y) + cos(x) + x set!(v, set_v) set!(V̅, set_V̅) set!(v_corrected, set_v_corrected) - sefs.auxiliary.Hᶠᶜ .= Lz - sefs.auxiliary.Hᶜᶠ .= Lz - Δz = zeros(Nz) Δz .= grid.Δzᵃᵃᶜ diff --git a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl index 5fbe1c766a..1358bb59cf 100644 --- a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl +++ b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl @@ -6,6 +6,8 @@ using Statistics using Oceananigans.BoundaryConditions using Oceananigans.DistributedComputations using Random +using JLD2 +using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, active_interior_map # Run with # @@ -13,10 +15,11 @@ using Random # mpiexec -n 4 julia --project distributed_hydrostatic_turbulence.jl # ``` -function run_simulation(nx, ny, arch, topo) - grid = RectilinearGrid(arch; topology=topo, size=(nx, ny, 1), extent=(4π, 4π, 0.5), halo=(7, 7, 7)) +function run_simulation(nx, ny, arch; topology = (Periodic, Periodic, Bounded)) + grid = RectilinearGrid(arch; topology, size = (Nx, Ny, 10), extent=(4π, 4π, 0.5), halo=(8, 8, 8)) + bottom(x, y) = (x > π && x < 3π/2 && y > π/2 && y < 3π/2) ? 1.0 : - grid.Lz - 1.0 - grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom)) + grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom); active_cells_map = true) model = HydrostaticFreeSurfaceModel(; grid, momentum_advection = VectorInvariant(vorticity_scheme=WENO(order=9)), @@ -38,9 +41,9 @@ function run_simulation(nx, ny, arch, topo) set!(c, mask) u, v, _ = model.velocities - ζ = VerticalVorticityField(model) + # ζ = VerticalVorticityField(model) η = model.free_surface.η - outputs = merge(model.velocities, model.tracers, (; ζ, η)) + outputs = merge(model.velocities, model.tracers) progress(sim) = @info "Iteration: $(sim.model.clock.iteration), time: $(sim.model.clock.time), Δt: $(sim.Δt)" simulation = Simulation(model, Δt=0.02, stop_time=100.0) @@ -60,17 +63,13 @@ function run_simulation(nx, ny, arch, topo) MPI.Barrier(arch.communicator) end -topo = (Periodic, Periodic, Bounded) +Nx = 32 +Ny = 32 -# Use non-uniform partitioning in x, y. -# TODO: Explain what local_index is. -nx = [90, 128-90][arch.local_index[1]] -ny = [56, 128-56][arch.local_index[2]] -@show arch.local_index -arch = Distributed(CPU(), topology = topo, ranks=(2, 2, 1)) +arch = Distributed(CPU(), partition = Partition(2, 2)) # Run the simulation -run_simulation(nx, ny, arch, topo) +run_simulation(Nx, Ny, arch) # Visualize the plane # Produce a video for variable `var` @@ -109,7 +108,6 @@ try if MPI.Comm_rank(MPI.COMM_WORLD) == 0 visualize_simulation("u") visualize_simulation("v") - visualize_simulation("ζ") visualize_simulation("c") end catch err