From ee1b34ef46b3165ffce9efe4bfd3288c0666bb0d Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 09:28:09 -0500 Subject: [PATCH 001/138] full interior map --- src/ImmersedBoundaries/ImmersedBoundaries.jl | 16 ++- src/ImmersedBoundaries/active_cells_map.jl | 46 +++++--- .../hydrostatic_free_surface_ab2_step.jl | 21 +++- .../split_explicit_free_surface_kernels.jl | 104 ++++++++++++++++-- src/Solvers/batched_tridiagonal_solver.jl | 14 ++- src/TimeSteppers/quasi_adams_bashforth_2.jl | 16 ++- src/TimeSteppers/store_tendencies.jl | 12 +- .../vertically_implicit_diffusion_solver.jl | 4 +- .../inertial_particles.jl | 66 +++++++++++ 9 files changed, 255 insertions(+), 44 deletions(-) create mode 100644 validation/lagrangian_particles/inertial_particles.jl diff --git a/src/ImmersedBoundaries/ImmersedBoundaries.jl b/src/ImmersedBoundaries/ImmersedBoundaries.jl index 49d49d3934..f7e8d3f429 100644 --- a/src/ImmersedBoundaries/ImmersedBoundaries.jl +++ b/src/ImmersedBoundaries/ImmersedBoundaries.jl @@ -102,18 +102,19 @@ abstract type AbstractImmersedBoundary end ##### ImmersedBoundaryGrid ##### -struct ImmersedBoundaryGrid{FT, TX, TY, TZ, G, I, M, Arch} <: AbstractGrid{FT, TX, TY, TZ, Arch} +struct ImmersedBoundaryGrid{FT, TX, TY, TZ, G, I, M, S, Arch} <: AbstractGrid{FT, TX, TY, TZ, Arch} architecture :: Arch underlying_grid :: G immersed_boundary :: I interior_active_cells :: M - + surface_active_cells :: S + # Internal interface - function ImmersedBoundaryGrid{TX, TY, TZ}(grid::G, ib::I, mi::M) where {TX, TY, TZ, G <: AbstractUnderlyingGrid, I, M} + function ImmersedBoundaryGrid{TX, TY, TZ}(grid::G, ib::I, mi::M, ms::S) where {TX, TY, TZ, G <: AbstractUnderlyingGrid, I, M, S} FT = eltype(grid) arch = architecture(grid) Arch = typeof(arch) - return new{FT, TX, TY, TZ, G, I, M, Arch}(arch, grid, ib, mi) + return new{FT, TX, TY, TZ, G, I, M, S, Arch}(arch, grid, ib, mi, ms) end # Constructor with no active map @@ -121,7 +122,7 @@ struct ImmersedBoundaryGrid{FT, TX, TY, TZ, G, I, M, Arch} <: AbstractGrid{FT, T FT = eltype(grid) arch = architecture(grid) Arch = typeof(arch) - return new{FT, TX, TY, TZ, G, I, Nothing, Arch}(arch, grid, ib, nothing) + return new{FT, TX, TY, TZ, G, I, Nothing, Nothing, Arch}(arch, grid, ib, nothing, nothing) end end @@ -141,7 +142,10 @@ const IBG = ImmersedBoundaryGrid @inline z_domain(ibg::IBG) = z_domain(ibg.underlying_grid) Adapt.adapt_structure(to, ibg::IBG{FT, TX, TY, TZ}) where {FT, TX, TY, TZ} = - ImmersedBoundaryGrid{TX, TY, TZ}(adapt(to, ibg.underlying_grid), adapt(to, ibg.immersed_boundary), adapt(to, ibg.interior_active_cells)) + ImmersedBoundaryGrid{TX, TY, TZ}(adapt(to, ibg.underlying_grid), + adapt(to, ibg.immersed_boundary), + adapt(to, ibg.interior_active_cells), + adapt(to, ibg.surface_active_cells)) with_halo(halo, ibg::ImmersedBoundaryGrid) = ImmersedBoundaryGrid(with_halo(halo, ibg.underlying_grid), ibg.immersed_boundary) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index f3ce915df7..0369198ebe 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -6,21 +6,24 @@ using KernelAbstractions: @kernel, @index import Oceananigans.Utils: active_cells_work_layout, use_only_active_interior_cells -const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! +using Oceananigans.Solvers: solve_batched_tridiagonal_system_z!, ZDirection + +const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} struct InteriorMap end struct SurfaceMap end -@inline use_only_active_interior_cells(grid::ActiveCellsIBG) = InteriorMap() - -@inline use_only_active_surface_cells(grid::AbstractGrid) = nothing -@inline use_only_active_surface_cells(grid::ActiveCellsIBG) = SurfaceMap() +@inline use_only_active_surface_cells(::AbstractGrid) = nothing +@inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() +@inline use_only_active_surface_cells(::ActiveSurfaceIBG) = SurfaceMap() -@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) -@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveCellsIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) +@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) -@inline active_linear_index_to_interior_tuple(idx, grid::ActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) -@inline active_linear_index_to_surface_tuple(idx, grid::ActiveCellsIBG) = Base.map(Int, grid.surface_active_cells[idx]) +@inline active_linear_index_to_interior_tuple(idx, grid::ActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) +@inline active_linear_index_to_surface_tuple(idx, grid::ActiveSurfaceIBG) = Base.map(Int, grid.surface_active_cells[idx]) function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) @@ -29,17 +32,18 @@ function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) # Create the cells map on the CPU, then switch it to the GPU if active_cells_map - map_interior = active_cells_map_interior(ibg) - map_interior = arch_array(architecture(ibg), map_interior) - # map_surface = active_cells_map_surface(ibg) - # map_surface = arch_array(architecture(ibg), map_surface) + interior_map = active_cells_interior_map(ibg) + interior_map = arch_array(architecture(ibg), interior_map) + surface_map = active_cells_surface_map(ibg) + surface_map = arch_array(architecture(ibg), surface_map) else - map_interior = nothing + interior_map = nothing end return ImmersedBoundaryGrid{TX, TY, TZ}(ibg.underlying_grid, ibg.immersed_boundary, - map_interior) + interior_map, + surface_map) end @inline active_cell(i, j, k, ibg) = !immersed_cell(i, j, k, ibg) @@ -65,7 +69,7 @@ const MAXUInt8 = 2^8 - 1 const MAXUInt16 = 2^16 - 1 const MAXUInt32 = 2^32 - 1 -function active_cells_map_interior(ibg) +function active_cells_interior_map(ibg) active_cells_field = compute_interior_active_cells(ibg) N = maximum(size(ibg)) @@ -105,7 +109,7 @@ end # If we eventually want to perform also barotropic step, `w` computation and `p` # computation only on active `columns` -function active_cells_map_surface(ibg) +function active_cells_surface_map(ibg) active_cells_field = compute_surface_active_cells(ibg) interior_cells = arch_array(CPU(), interior(active_cells_field, :, :, 1)) @@ -119,3 +123,11 @@ function active_cells_map_surface(ibg) return smaller_indices end + +@kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid::ActiveSurfaceIBG, p, args, tridiagonal_direction::ZDirection) + idx = @index(Global, Linear) + i, j = active_linear_index_to_surface_tuple(idx, grid) + Nz = size(grid, 3) + + solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +end \ No newline at end of file diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl index 509b1fe0ea..24c58ff2b4 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl @@ -2,6 +2,8 @@ using Oceananigans.Fields: location using Oceananigans.TimeSteppers: ab2_step_field! using Oceananigans.TurbulenceClosures: implicit_step! +using Oceananigans.ImmersedBoundaries: use_only_active_interior_cells, use_only_active_surface_cells + import Oceananigans.TimeSteppers: ab2_step! ##### @@ -35,13 +37,18 @@ end function ab2_step_velocities!(velocities, model, Δt, χ) + only_active_interior_cells = use_only_active_interior_cells(model.grid) + only_active_surface_cells = use_only_active_surface_cells(model.grid) + for (i, name) in enumerate((:u, :v)) Gⁿ = model.timestepper.Gⁿ[name] G⁻ = model.timestepper.G⁻[name] velocity_field = model.velocities[name] + launch!(model.architecture, model.grid, :xyz, - ab2_step_field!, velocity_field, Δt, χ, Gⁿ, G⁻) + ab2_step_field!, velocity_field, Δt, χ, Gⁿ, G⁻; + only_active_cells = only_active_interior_cells) # TODO: let next implicit solve depend on previous solve + explicit velocity step # Need to distinguish between solver events and tendency calculation events. @@ -52,7 +59,8 @@ function ab2_step_velocities!(velocities, model, Δt, χ) model.diffusivity_fields, nothing, model.clock, - Δt) + Δt; + only_active_cells = only_active_surface_cells) end return nothing @@ -68,6 +76,9 @@ ab2_step_tracers!(::EmptyNamedTuple, model, Δt, χ) = nothing function ab2_step_tracers!(tracers, model, Δt, χ) + only_active_interior_cells = use_only_active_interior_cells(model.grid) + only_active_surface_cells = use_only_active_surface_cells(model.grid) + # Tracer update kernels for (tracer_index, tracer_name) in enumerate(propertynames(tracers)) Gⁿ = model.timestepper.Gⁿ[tracer_name] @@ -76,7 +87,8 @@ function ab2_step_tracers!(tracers, model, Δt, χ) closure = model.closure launch!(model.architecture, model.grid, :xyz, - ab2_step_field!, tracer_field, Δt, χ, Gⁿ, G⁻) + ab2_step_field!, tracer_field, Δt, χ, Gⁿ, G⁻; + only_active_cells = only_active_interior_cells) implicit_step!(tracer_field, model.timestepper.implicit_solver, @@ -84,7 +96,8 @@ function ab2_step_tracers!(tracers, model, Δt, χ) model.diffusivity_fields, Val(tracer_index), model.clock, - Δt) + Δt; + only_active_cells = only_active_surface_cells) end return nothing diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 1062cd43c9..754ffa362b 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -7,7 +7,8 @@ using Oceananigans.BoundaryConditions using Oceananigans.Operators using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f -using Oceananigans.ImmersedBoundaries: mask_immersed_field! +using Oceananigans.ImmersedBoundaries: mask_immersed_field!, use_only_active_surface_cells, use_only_active_interior_cells +using Oceananigans.ImmersedBoundaries: active_linear_index_to_surface_tuple, ActiveCellsIBG, ActiveSurfaceIBG # constants for AB3 time stepping scheme (from https://doi.org/10.1016/j.ocemod.2004.08.002) const β = 0.281105 @@ -141,8 +142,20 @@ using Printf Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) i, j = @index(Global, NTuple) - k_top = grid.Nz+1 + free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) +end + +@kernel function split_explicit_free_surface_evolution_kernel!(grid::ActiveSurfaceIBG, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) + idx = @index(Global, Linear) + i, j = active_linear_index_to_surface_tuple(idx, grid) + free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) +end +@inline function free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + k_top = grid.Nz+1 TX, TY, _ = topology(grid) @inbounds begin @@ -151,6 +164,8 @@ using Printf η[i, j, k_top] -= Δτ * (div_xᶜᶜᶠ_U(i, j, k_top-1, grid, TX, U★, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + div_yᶜᶜᶠ_V(i, j, k_top-1, grid, TY, U★, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²)) end + + return nothing end @kernel function split_explicit_barotropic_velocity_evolution_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², @@ -158,6 +173,31 @@ end Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) i, j = @index(Global, NTuple) + + velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper ) +end + + +@kernel function split_explicit_barotropic_velocity_evolution_kernel!(grid::ActiveSurfaceIBG, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) + idx = @index(Global, Linear) + i, j = active_linear_index_to_surface_tuple(idx, grid) + + velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper ) +end + +@inline function velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper ) k_top = grid.Nz+1 TX, TY, _ = topology(grid) @@ -195,8 +235,10 @@ function split_explicit_free_surface_substep!(η, state, auxiliary, settings, we η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) - launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, args...) - launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, args...) + launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, args...; + only_active_cells = use_only_active_surface_cells(grid)) + launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, args...; + only_active_cells = use_only_active_surface_cells(grid)) return nothing end @@ -216,9 +258,26 @@ end end end +# Barotropic Model Kernels +# u_Δz = u * Δz +@kernel function _barotropic_mode_kernel!(U, V, grid::ActiveSurfaceIBG, u, v) + idx = @index(Global, Linear) + i, j = active_linear_index_to_surface_tuple(idx, grid) + + # hand unroll first loop + @inbounds U[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] + @inbounds V[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] + + @unroll for k in 2:grid.Nz + @inbounds U[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] + @inbounds V[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] + end +end + # may need to do Val(Nk) since it may not be known at compile compute_barotropic_mode!(U, V, grid, u, v) = - launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v) + launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v; + only_active_cells = use_only_active_surface_cells(grid)) function initialize_free_surface_state!(free_surface_state, η) state = free_surface_state @@ -243,7 +302,7 @@ function initialize_free_surface_state!(free_surface_state, η) return nothing end -@kernel function barotropic_split_explicit_corrector_kernel!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ) +@kernel function barotropic_split_explicit_corrector_kernel!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) i, j, k = @index(Global, NTuple) @inbounds begin u[i, j, k] = u[i, j, k] + (U̅[i, j] - U[i, j]) / Hᶠᶜ[i, j] @@ -251,6 +310,15 @@ end end end +@kernel function barotropic_split_explicit_corrector_kernel!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid::ActiveCellsIBG) + idx = @index(Global, Linear) + i, j, k = active_linear_index_to_interior_tuple(idx, grid) + @inbounds begin + u[i, j, k] = u[i, j, k] + (U̅[i, j] - U[i, j]) / Hᶠᶜ[i, j] + v[i, j, k] = v[i, j, k] + (V̅[i, j] - V[i, j]) / Hᶜᶠ[i, j] + end +end + # may need to do Val(Nk) since it may not be known at compile. Also figure out where to put H function barotropic_split_explicit_corrector!(u, v, free_surface, grid) sefs = free_surface.state @@ -264,7 +332,8 @@ function barotropic_split_explicit_corrector!(u, v, free_surface, grid) # add in "good" barotropic mode launch!(arch, grid, :xyz, barotropic_split_explicit_corrector_kernel!, - u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ) + u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid; + only_active_cells = use_only_active_interior_cells(grid)) return nothing end @@ -362,6 +431,21 @@ end end end +# Calculate RHS for the barotopic time step. +@kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid::ActiveSurfaceIBG, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) + idx = @index(Global, Linear) + i, j = active_linear_index_to_surface_tuple(idx, grid) + + # hand unroll first loop + @inbounds Gᵁ[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) + + @unroll for k in 2:grid.Nz + @inbounds Gᵁ[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) + end +end + @inline ab2_step_Gu(i, j, k, grid, G⁻, Gⁿ, χ::FT) where FT = ifelse(peripheral_node(i, j, k, grid, f, c, c), zero(grid), (convert(FT, 1.5) + χ) * Gⁿ[i, j, k] - G⁻[i, j, k] * (convert(FT, 0.5) + χ)) @inline ab2_step_Gv(i, j, k, grid, G⁻, Gⁿ, χ::FT) where FT = ifelse(peripheral_node(i, j, k, grid, c, f, c), zero(grid), (convert(FT, 1.5) + χ) * Gⁿ[i, j, k] - G⁻[i, j, k] * (convert(FT, 0.5) + χ)) @@ -388,6 +472,8 @@ function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) end setup_split_explicit_tendency!(auxiliary, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) = - launch!(architecture(grid), grid, :xy, _compute_integrated_ab2_tendencies!, auxiliary.Gᵁ, auxiliary.Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) - + launch!(architecture(grid), grid, :xy, _compute_integrated_ab2_tendencies!, auxiliary.Gᵁ, auxiliary.Gⱽ, grid, + Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ; only_active_cells = use_only_active_surface_cells(grid)) + wait_free_surface_communication!(free_surface, arch) = nothing + diff --git a/src/Solvers/batched_tridiagonal_solver.jl b/src/Solvers/batched_tridiagonal_solver.jl index a188d0fc89..d69ad5ecfc 100644 --- a/src/Solvers/batched_tridiagonal_solver.jl +++ b/src/Solvers/batched_tridiagonal_solver.jl @@ -88,7 +88,7 @@ Reference implementation per Numerical Recipes, Press et al. 1992 (§ 2.4). Note a slightly different notation from Press et al. is used for indexing the off-diagonal elements; see [`BatchedTridiagonalSolver`](@ref). """ -function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...) +function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...; only_active_cells = nothing) launch_config = if solver.tridiagonal_direction isa XDirection :yz @@ -108,7 +108,8 @@ function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...) solver.grid, solver.parameters, Tuple(args), - solver.tridiagonal_direction) + solver.tridiagonal_direction; + only_active_cells) return nothing end @@ -124,7 +125,10 @@ end @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction::XDirection) Nx = size(grid, 1) j, k = @index(Global, NTuple) + solve_batched_tridiagonal_system_x!(j, k, Nx, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +end +@inline function solve_batched_tridiagonal_system_x!(j, k, Nx, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) @inbounds begin β = get_coefficient(1, j, k, grid, b, p, tridiagonal_direction, args...) f₁ = get_coefficient(1, j, k, grid, f, p, tridiagonal_direction, args...) @@ -156,7 +160,10 @@ end @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction::YDirection) Ny = size(grid, 2) i, k = @index(Global, NTuple) + solve_batched_tridiagonal_system_y!(i, k, Ny, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +end +@inline function solve_batched_tridiagonal_system_y!(i, k, Ny, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) @inbounds begin β = get_coefficient(i, 1, k, grid, b, p, tridiagonal_direction, args...) f₁ = get_coefficient(i, 1, k, grid, f, p, tridiagonal_direction, args...) @@ -188,7 +195,10 @@ end @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction::ZDirection) Nz = size(grid, 3) i, j = @index(Global, NTuple) + solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +end +@inline function solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) @inbounds begin β = get_coefficient(i, j, 1, grid, b, p, tridiagonal_direction, args...) f₁ = get_coefficient(i, j, 1, grid, f, p, tridiagonal_direction, args...) diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index 6c3854e843..21f309148e 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -1,6 +1,7 @@ using Oceananigans.Fields: FunctionField, location using Oceananigans.TurbulenceClosures: implicit_step! using Oceananigans.Utils: @apply_regionally, apply_regionally! +using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, active_linear_index_to_interior_tuple mutable struct QuasiAdamsBashforth2TimeStepper{FT, GT, IT} <: AbstractTimeStepper χ :: FT @@ -147,7 +148,7 @@ Time step velocity fields via the 2nd-order quasi Adams-Bashforth method `U^{n+1} = U^n + Δt ((3/2 + χ) * G^{n} - (1/2 + χ) G^{n-1})` """ -@kernel function ab2_step_field!(u, Δt, χ, Gⁿ, G⁻) +@kernel function ab2_step_field!(u, Δt, χ, Gⁿ, G⁻, grid) i, j, k = @index(Global, NTuple) FT = eltype(χ) @@ -157,4 +158,15 @@ Time step velocity fields via the 2nd-order quasi Adams-Bashforth method @inbounds u[i, j, k] += convert(FT, Δt) * ((one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k]) end -@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻) = nothing +@kernel function ab2_step_field!(u, Δt, χ, Gⁿ, G⁻, grid::ActiveCellsIBG) + idx = @index(Global, Linear) + i, j, k = active_linear_index_to_interior_tuple(idx, grid) + + FT = eltype(χ) + one_point_five = convert(FT, 1.5) + oh_point_five = convert(FT, 0.5) + + @inbounds u[i, j, k] += convert(FT, Δt) * ((one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k]) +end + +@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid) = nothing diff --git a/src/TimeSteppers/store_tendencies.jl b/src/TimeSteppers/store_tendencies.jl index 06d179bd3a..6045494cee 100644 --- a/src/TimeSteppers/store_tendencies.jl +++ b/src/TimeSteppers/store_tendencies.jl @@ -9,15 +9,23 @@ using Oceananigans.Utils: launch! @inbounds G⁻[i, j, k] = G⁰[i, j, k] end +""" Store source terms for `u`, `v`, and `w`. """ +@kernel function store_field_tendencies!(G⁻, grid::ActiveCellsIBG, G⁰) + idx = @index(Global, Linear) + i, j, k = active_linear_index_to_interior_tuple(idx, grid) + @inbounds G⁻[i, j, k] = G⁰[i, j, k] +end + """ Store previous source terms before updating them. """ -function store_tendencies!(model) +function store_tendencies!(model; only_active_cells = only_active_interior_cells(model.grid)) model_fields = prognostic_fields(model) for field_name in keys(model_fields) launch!(model.architecture, model.grid, :xyz, store_field_tendencies!, model.timestepper.G⁻[field_name], model.grid, - model.timestepper.Gⁿ[field_name]) + model.timestepper.Gⁿ[field_name]; + only_active_cells) end return nothing diff --git a/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl b/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl index 71fa9bd8e4..f3b3e05c3a 100644 --- a/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl +++ b/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl @@ -178,7 +178,7 @@ function implicit_step!(field::Field, diffusivity_fields, tracer_index, clock, - Δt) + Δt; kwargs...) loc = location(field) @@ -208,6 +208,6 @@ function implicit_step!(field::Field, return solve!(field, implicit_solver, field, # ivd_*_diagonal gets called with these args after (i, j, k, grid): - vi_closure, vi_diffusivity_fields, tracer_index, map(ℓ -> ℓ(), loc)..., clock, Δt, κz) + vi_closure, vi_diffusivity_fields, tracer_index, map(ℓ -> ℓ(), loc)..., clock, Δt, κz; kwargs...) end diff --git a/validation/lagrangian_particles/inertial_particles.jl b/validation/lagrangian_particles/inertial_particles.jl new file mode 100644 index 0000000000..e6bbfbe9d2 --- /dev/null +++ b/validation/lagrangian_particles/inertial_particles.jl @@ -0,0 +1,66 @@ +using StructArrays +using Oceananigans +using Oceananigans: architecture +using Oceananigans.Models.LagrangianParticleTracking: AbstractParticle +using Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_w_from_continuity! +import Oceananigans.Models.LagrangianParticleTracking: particle_u_velocity, particle_v_velocity, particle_w_velocity + +struct InertialParticle{T} <: AbstractParticle + x :: T + y :: T + z :: T + u :: T + v :: T + w :: T + particle_respose_time :: T +end + +# 10 Particles with different inertia +x = ones(10) +y = ones(10) +z = ones(10) +u = zeros(10) +v = zeros(10) +w = zeros(10) + +particle_respose_time = range(0.1, 1.0, length = 10) + +properties = StructArray{InertialParticle}((x, y, z, u, v, w, particle_respose_time)) +particles = LagrangianParticles(properties) + +grid = RectilinearGrid(size = (50, 50, 50), x = (0, 2), y = (0, 2), z = (0, 2), topology = (Periodic, Periodic, Periodic)) + +u_fluid = XFaceField(grid) +v_fluid = YFaceField(grid) +w_fluid = ZFaceField(grid) + +@inline particles_u_velocity(u_fluid, particle, Δt) = particle.u + Δt / particles.particle_respose_time * (u_fluid - particle.u) +@inline particles_v_velocity(v_fluid, particle, Δt) = particle.v + Δt / particles.particle_respose_time * (v_fluid - particle.v) +@inline particles_w_velocity(w_fluid, particle, Δt) = particle.w + Δt / particles.particle_respose_time * (w_fluid - particle.w) + +set!(u_fluid, (x, y, z) -> rand()) +set!(v_fluid, (x, y, z) -> rand()) + +fill_halo_regions!((u_fluid, v_fluid)) + +compute_w_from_continuity!((; u = u_fluid, v = v_fluid, w = w_fluid), architecture(grid), grid) + +velocities = PrescribedVelocityFields(; u = u_fluid, v = v_fluid, w = w_fluid) + +model = HydrostaticFreeSurfaceModel(; grid, + tracers = (), + buoyancy = nothing, + particles, + velocities) + +simulation = Simulation(model, Δt = 1e-2, stop_time = 10) + +particles_save = [deepcopy(properties)] + +save_particles(sim) = + push!(particles_save, deepcopy(sim.model.particles.properties)) + +simulation.callbacks[:particles] = Callback(save_particles, IterationInterval(10)) + +run!(simulation) + From 31ba1d67be0d7ab735b0e551ca723cededbe7504 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 09:55:19 -0500 Subject: [PATCH 002/138] bugfix --- src/TimeSteppers/store_tendencies.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TimeSteppers/store_tendencies.jl b/src/TimeSteppers/store_tendencies.jl index 6045494cee..d5bae5d376 100644 --- a/src/TimeSteppers/store_tendencies.jl +++ b/src/TimeSteppers/store_tendencies.jl @@ -1,6 +1,6 @@ using Oceananigans: prognostic_fields using Oceananigans.Grids: AbstractGrid - +using Oceananigans.ImmersedBoundaries: ActiveCellsIBG using Oceananigans.Utils: launch! """ Store source terms for `u`, `v`, and `w`. """ From 14031619c31c5357d66422431a986071a8059957 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 10:19:55 -0500 Subject: [PATCH 003/138] bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 0369198ebe..d52e3db5a2 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -38,6 +38,7 @@ function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) surface_map = arch_array(architecture(ibg), surface_map) else interior_map = nothing + surface_map = nothing end return ImmersedBoundaryGrid{TX, TY, TZ}(ibg.underlying_grid, From 7d7620362eb46322f2c6c0806dd3b0e574a025d1 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 10:21:24 -0500 Subject: [PATCH 004/138] bugfixes --- .../hydrostatic_free_surface_ab2_step.jl | 4 ++-- src/TimeSteppers/quasi_adams_bashforth_2.jl | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl index 24c58ff2b4..c2a88821af 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl @@ -47,7 +47,7 @@ function ab2_step_velocities!(velocities, model, Δt, χ) launch!(model.architecture, model.grid, :xyz, - ab2_step_field!, velocity_field, Δt, χ, Gⁿ, G⁻; + ab2_step_field!, velocity_field, Δt, χ, Gⁿ, G⁻, model.grid; only_active_cells = only_active_interior_cells) # TODO: let next implicit solve depend on previous solve + explicit velocity step @@ -87,7 +87,7 @@ function ab2_step_tracers!(tracers, model, Δt, χ) closure = model.closure launch!(model.architecture, model.grid, :xyz, - ab2_step_field!, tracer_field, Δt, χ, Gⁿ, G⁻; + ab2_step_field!, tracer_field, Δt, χ, Gⁿ, G⁻, model.grid; only_active_cells = only_active_interior_cells) implicit_step!(tracer_field, diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index 21f309148e..c589dfee73 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -125,7 +125,8 @@ function ab2_step!(model, Δt, χ) step_field_kernel!(field, Δt, χ, model.timestepper.Gⁿ[i], - model.timestepper.G⁻[i]) + model.timestepper.G⁻[i], + model.grid) # TODO: function tracer_index(model, field_index) = field_index - 3, etc... tracer_index = Val(i - 3) # assumption From b5d6a4294dabc181cf7bbd652962f41533f7a5e2 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 10:26:32 -0500 Subject: [PATCH 005/138] hmmm --- ext/OceananigansEnzymeCoreExt.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ext/OceananigansEnzymeCoreExt.jl b/ext/OceananigansEnzymeCoreExt.jl index 9d273f56f1..c43033a0e3 100644 --- a/ext/OceananigansEnzymeCoreExt.jl +++ b/ext/OceananigansEnzymeCoreExt.jl @@ -102,19 +102,19 @@ function EnzymeCore.EnzymeRules.augmented_primal(config, end function EnzymeCore.EnzymeRules.reverse(config::EnzymeCore.EnzymeRules.ConfigWidth{1}, - func::EnzymeCore.Const{typeof(Oceananigans.Utils.launch!)}, - ::Type{EnzymeCore.Const{Nothing}}, - tape, - arch, - grid, - workspec, - kernel!, - kernel_args...; - include_right_boundaries = false, - reduced_dimensions = (), - location = nothing, - only_active_cells = nothing, - kwargs...) + func::EnzymeCore.Const{typeof(Oceananigans.Utils.launch!)}, + ::Type{EnzymeCore.Const{Nothing}}, + tape, + arch, + grid, + workspec, + kernel!, + kernel_args...; + include_right_boundaries = false, + reduced_dimensions = (), + location = nothing, + only_active_cells = nothing, + kwargs...) subrets = if tape !== nothing duploop, subtape = tape From ee62becb940e8df6b29cf0037cd6907212d205af Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 10:34:43 -0500 Subject: [PATCH 006/138] disambiguate --- .../hydrostatic_free_surface_ab2_step.jl | 1 - src/TimeSteppers/quasi_adams_bashforth_2.jl | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl index c2a88821af..6cf8f349db 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl @@ -45,7 +45,6 @@ function ab2_step_velocities!(velocities, model, Δt, χ) G⁻ = model.timestepper.G⁻[name] velocity_field = model.velocities[name] - launch!(model.architecture, model.grid, :xyz, ab2_step_field!, velocity_field, Δt, χ, Gⁿ, G⁻, model.grid; only_active_cells = only_active_interior_cells) diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index c589dfee73..f8348777f7 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -170,4 +170,5 @@ end @inbounds u[i, j, k] += convert(FT, Δt) * ((one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k]) end -@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid) = nothing +@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid) = nothing +@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid::ActiveCellsIBG) = nothing From e880b2c2853e59872f1009744792daca423ea8dd Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 11:45:28 -0500 Subject: [PATCH 007/138] some organizing --- src/Advection/vector_invariant_advection.jl | 31 ++++++++++----------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/Advection/vector_invariant_advection.jl b/src/Advection/vector_invariant_advection.jl index a16af6f950..c782a934a6 100644 --- a/src/Advection/vector_invariant_advection.jl +++ b/src/Advection/vector_invariant_advection.jl @@ -167,10 +167,7 @@ Base.show(io::IO, a::VectorInvariant{N, FT}) where {N, FT} = ##### Convenience for WENO Vector Invariant ##### -# VectorInvariant{N, FT, M, Z (vorticity scheme), ZS, V (vertical scheme), K (kinetic energy gradient scheme) -const WENOVectorInvariant = VectorInvariant{<:Any, <:Any, <:Any, <:WENO, <:Any, <:WENO, <:WENO} - -nothing_to_default(user_value, default) = isnothing(user_value) ? default : user_value +nothing_to_default(user_value; default) = isnothing(user_value) ? default : user_value """ function WENOVectorInvariant(; upwinding = nothing, @@ -189,23 +186,23 @@ function WENOVectorInvariant(; upwinding = nothing, weno_kw...) if isnothing(order) # apply global defaults - vorticity_order = nothing_to_default(vorticity_order, default=9) - vertical_order = nothing_to_default(vertical_order, default=5) - divergence_order = nothing_to_default(divergence_order, default=5) - kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default=5) + vorticity_order = nothing_to_default(vorticity_order, default = 9) + vertical_order = nothing_to_default(vertical_order, default = 5) + divergence_order = nothing_to_default(divergence_order, default = 5) + kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default = 5) else # apply user supplied `order` unless overridden by more specific value - vorticity_order = nothing_to_default(vorticity_order, default=order) - vertical_order = nothing_to_default(vertical_order, default=order) - divergence_order = nothing_to_default(divergence_order, default=order) - kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default=order) + vorticity_order = nothing_to_default(vorticity_order, default = order) + vertical_order = nothing_to_default(vertical_order, default = order) + divergence_order = nothing_to_default(divergence_order, default = order) + kinetic_energy_gradient_order = nothing_to_default(kinetic_energy_gradient_order, default = order) end - vorticity_scheme = WENO(; order=vorticity_order, weno_kw...) - vertical_scheme = WENO(; order=vertical_order, weno_kw...) - kinetic_energy_gradient_scheme = WENO(; order=kinetic_energy_gradient_order, weno_kw...) - divergence_scheme = WENO(; order=divergence_order, weno_kw...) + vorticity_scheme = WENO(; order = vorticity_order, weno_kw...) + vertical_scheme = WENO(; order = vertical_order, weno_kw...) + kinetic_energy_gradient_scheme = WENO(; order = kinetic_energy_gradient_order, weno_kw...) + divergence_scheme = WENO(; order = divergence_order, weno_kw...) - default_upwinding = OnlySelfUpwinding(cross_scheme=divergence_scheme) + default_upwinding = OnlySelfUpwinding(cross_scheme = divergence_scheme) upwinding = nothing_to_default(upwinding; default = default_upwinding) schemes = (vorticity_scheme, vertical_scheme, kinetic_energy_gradient_scheme, divergence_scheme) From 6f4aaad47a31e7036c41f02d83da9a4f967296f7 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 26 Nov 2023 13:24:16 -0500 Subject: [PATCH 008/138] hmmm --- src/ImmersedBoundaries/active_cells_map.jl | 6 +++--- .../vertically_implicit_diffusion_solver.jl | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index d52e3db5a2..c8256b9cf2 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -6,9 +6,10 @@ using KernelAbstractions: @kernel, @index import Oceananigans.Utils: active_cells_work_layout, use_only_active_interior_cells -import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! using Oceananigans.Solvers: solve_batched_tridiagonal_system_z!, ZDirection +import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! + const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} @@ -126,9 +127,8 @@ function active_cells_surface_map(ibg) end @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid::ActiveSurfaceIBG, p, args, tridiagonal_direction::ZDirection) + Nz = size(grid, 3) idx = @index(Global, Linear) i, j = active_linear_index_to_surface_tuple(idx, grid) - Nz = size(grid, 3) - solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) end \ No newline at end of file diff --git a/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl b/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl index f3b3e05c3a..56933e77e9 100644 --- a/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl +++ b/src/TurbulenceClosures/vertically_implicit_diffusion_solver.jl @@ -178,7 +178,8 @@ function implicit_step!(field::Field, diffusivity_fields, tracer_index, clock, - Δt; kwargs...) + Δt; + kwargs...) loc = location(field) From f0b59a3d34442ec3d24a20e9bd20b901d02a32fd Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 27 Nov 2023 17:44:50 -0500 Subject: [PATCH 009/138] improve speed --- src/Advection/tracer_advection_operators.jl | 24 +++++++++++++++++++++ src/Advection/vector_invariant_advection.jl | 13 ++++++----- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/Advection/tracer_advection_operators.jl b/src/Advection/tracer_advection_operators.jl index b01f2471ff..f6bd834af5 100644 --- a/src/Advection/tracer_advection_operators.jl +++ b/src/Advection/tracer_advection_operators.jl @@ -1,6 +1,24 @@ using Oceananigans.Operators: Vᶜᶜᶜ using Oceananigans.Fields: ZeroField +struct ThreeDimensionalTracerAdvection{N, FT, A, B, C} <: AbstractAdvectionScheme{N, FT} + x :: A + y :: B + z :: C + + ThreeDimensionalTracerAdvection{N, FT}(x::A, y::B, z::C) where {N, FT, A, B, C} = new{N, FT, A, B, C}(x, y, z) +end + +function ThreeDimensionalTracerAdvection(; x, y, z) + Nx = required_halo_size(x) + Ny = required_halo_size(y) + Nz = required_halo_size(z) + + FT = eltype(x) + + return ThreeDimensionalTracerAdvection{max(Nx, Ny, Nz), FT}(x, y, z) +end + @inline _advective_tracer_flux_x(args...) = advective_tracer_flux_x(args...) @inline _advective_tracer_flux_y(args...) = advective_tracer_flux_y(args...) @inline _advective_tracer_flux_z(args...) = advective_tracer_flux_z(args...) @@ -32,3 +50,9 @@ which ends up at the location `ccc`. δyᵃᶜᵃ(i, j, k, grid, _advective_tracer_flux_y, advection, U.v, c) + δzᵃᵃᶜ(i, j, k, grid, _advective_tracer_flux_z, advection, U.w, c)) end + +@inline function div_Uc(i, j, k, grid, advection::ThreeDimensionalTracerAdvection, U, c) + return 1/Vᶜᶜᶜ(i, j, k, grid) * (δxᶜᵃᵃ(i, j, k, grid, _advective_tracer_flux_x, advection.x, U.u, c) + + δyᵃᶜᵃ(i, j, k, grid, _advective_tracer_flux_y, advection.y, U.v, c) + + δzᵃᵃᶜ(i, j, k, grid, _advective_tracer_flux_z, advection.z, U.w, c)) +end diff --git a/src/Advection/vector_invariant_advection.jl b/src/Advection/vector_invariant_advection.jl index c782a934a6..dde715f7fc 100644 --- a/src/Advection/vector_invariant_advection.jl +++ b/src/Advection/vector_invariant_advection.jl @@ -111,9 +111,9 @@ Vector Invariant, Dimension-by-dimension reconstruction function VectorInvariant(; vorticity_scheme = EnstrophyConserving(), vorticity_stencil = VelocityStencil(), vertical_scheme = EnergyConserving(), - kinetic_energy_gradient_scheme = vertical_scheme, divergence_scheme = vertical_scheme, - upwinding = OnlySelfUpwinding(; cross_scheme = vertical_scheme), + kinetic_energy_gradient_scheme = divergence_scheme, + upwinding = OnlySelfUpwinding(; cross_scheme = divergence_scheme), multi_dimensional_stencil = false) N = required_halo_size(vorticity_scheme) @@ -132,7 +132,6 @@ end const MultiDimensionalVectorInvariant = VectorInvariant{<:Any, <:Any, true} # VectorInvariant{N, FT, M, Z (vorticity scheme) -const MultiDimensionalVectorInvariant = VectorInvariant{<:Any, <:Any, true} const VectorInvariantEnergyConserving = VectorInvariant{<:Any, <:Any, <:Any, <:EnergyConserving} const VectorInvariantEnstrophyConserving = VectorInvariant{<:Any, <:Any, <:Any, <:EnstrophyConserving} const VectorInvariantUpwindVorticity = VectorInvariant{<:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme} @@ -145,10 +144,10 @@ const VectorInvariantKEGradientEnergyConserving = VectorInvariant{<:Any, <:Any, const VectorInvariantKineticEnergyUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme} -# VectorInvariant{N, FT, M, Z, ZS, V, K, D, U (upwinding) -const VectorInvariantCrossVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:Any, <:Any, <:CrossAndSelfUpwinding} -const VectorInvariantSelfVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:Any, <:Any, <:OnlySelfUpwinding} -const VectorInvariantVelocityVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:Any, <:Any, <:VelocityUpwinding} +# VectorInvariant{N, FT, M, Z, ZS, V, K, D, U (upwinding) +const VectorInvariantCrossVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:CrossAndSelfUpwinding} +const VectorInvariantSelfVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:OnlySelfUpwinding} +const VectorInvariantVelocityVerticalUpwinding = VectorInvariant{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractUpwindBiasedAdvectionScheme, <:VelocityUpwinding} Base.summary(a::VectorInvariant) = string("Vector Invariant, Dimension-by-dimension reconstruction") Base.summary(a::MultiDimensionalVectorInvariant) = string("Vector Invariant, Multidimensional reconstruction") From 5dddbb9e24a2e95ba881c0c7e7169cf3d71d23c1 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 27 Nov 2023 20:44:00 -0500 Subject: [PATCH 010/138] now we get going --- .../store_hydrostatic_free_surface_tendencies.jl | 6 ++++-- src/TimeSteppers/store_tendencies.jl | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl index 469fb62c33..24d26c213c 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl @@ -4,6 +4,7 @@ using Oceananigans.TimeSteppers: store_field_tendencies! using Oceananigans: prognostic_fields using Oceananigans.Grids: AbstractGrid +using Oceananigans.ImmersedBoundaries: use_only_active_interior_cells using Oceananigans.Utils: launch! @@ -27,7 +28,7 @@ function store_free_surface_tendency!(::ExplicitFreeSurface, model) end """ Store previous source terms before updating them. """ -function store_tendencies!(model::HydrostaticFreeSurfaceModel) +function store_tendencies!(model::HydrostaticFreeSurfaceModel; only_active_cells = use_only_active_interior_cells(model.grid)) prognostic_field_names = keys(prognostic_fields(model)) three_dimensional_prognostic_field_names = filter(name -> name != :η, prognostic_field_names) @@ -37,7 +38,8 @@ function store_tendencies!(model::HydrostaticFreeSurfaceModel) store_field_tendencies!, model.timestepper.G⁻[field_name], model.grid, - model.timestepper.Gⁿ[field_name]) + model.timestepper.Gⁿ[field_name]; + only_active_cells) end diff --git a/src/TimeSteppers/store_tendencies.jl b/src/TimeSteppers/store_tendencies.jl index d5bae5d376..ea700ef1e2 100644 --- a/src/TimeSteppers/store_tendencies.jl +++ b/src/TimeSteppers/store_tendencies.jl @@ -17,7 +17,7 @@ end end """ Store previous source terms before updating them. """ -function store_tendencies!(model; only_active_cells = only_active_interior_cells(model.grid)) +function store_tendencies!(model; only_active_cells = use_only_active_interior_cells(model.grid)) model_fields = prognostic_fields(model) for field_name in keys(model_fields) From 90b0f7a7b040e301b1c30d14ae50ce20161571e5 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 27 Nov 2023 21:36:44 -0500 Subject: [PATCH 011/138] check it out --- src/DistributedComputations/distributed_grids.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index fb5567e91d..2663fc843b 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -41,7 +41,7 @@ end @inline local_sizes(N, R::Fractional) = Tuple(ceil(Int, N * r) for r in R.sizes) @inline function local_sizes(N, R::Sizes) if N != sum(R.sizes) - @warn "The domain size specified in the architecture $(R.sizes) is inconsistent + @warn "The domain size specified in the architecture $(sum(R.sizes)) is inconsistent with the grid size $N: using the architecture-specified size" end return R.sizes @@ -130,6 +130,8 @@ function LatitudeLongitudeGrid(arch::Distributed, φl = partition(latitude, nφ, arch, 2) zl = partition(z, nz, arch, 3) + @info arch.local_rank longitude latitude λl φl + # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, # it is stretched if being passed is a function or vector (as for the VerticallyStretchedRectilinearGrid) From 6ce5b45de5fba6475a32fbfb36092f023b45e6ef Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 27 Nov 2023 21:47:19 -0500 Subject: [PATCH 012/138] check bathymetry --- src/DistributedComputations/distributed_fields.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DistributedComputations/distributed_fields.jl b/src/DistributedComputations/distributed_fields.jl index ebffdf4f76..514a7434c9 100644 --- a/src/DistributedComputations/distributed_fields.jl +++ b/src/DistributedComputations/distributed_fields.jl @@ -39,6 +39,7 @@ end function set!(u::DistributedField, v::Union{Array, CuArray}) gsize = global_size(architecture(u), size(u)) + @show gsize size(v) size(u) if size(v) == size(u) f = arch_array(architecture(u), v) u .= f From 2875984f6523fe3d81c9149a695b49b92c56548f Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 27 Nov 2023 21:49:49 -0500 Subject: [PATCH 013/138] fixit --- .../distributed_fields.jl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/DistributedComputations/distributed_fields.jl b/src/DistributedComputations/distributed_fields.jl index 514a7434c9..878646662e 100644 --- a/src/DistributedComputations/distributed_fields.jl +++ b/src/DistributedComputations/distributed_fields.jl @@ -39,17 +39,19 @@ end function set!(u::DistributedField, v::Union{Array, CuArray}) gsize = global_size(architecture(u), size(u)) - @show gsize size(v) size(u) - if size(v) == size(u) - f = arch_array(architecture(u), v) - u .= f - return u - elseif size(v) == gsize + if size(v) == gsize f = partition_global_array(architecture(u), v, size(u)) u .= f return u else - throw(ArgumentError("ERROR: DimensionMismatch: array could not be set to match destination field")) + try + f = arch_array(architecture(u), v) + u .= f + return u + + catch + throw(ArgumentError("ERROR: DimensionMismatch: array could not be set to match destination field")) + end end end From 6cd7444160e37d918f9b8ad200f9189f77a62718 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 27 Nov 2023 21:51:36 -0500 Subject: [PATCH 014/138] rmove distributed --- src/DistributedComputations/distributed_grids.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 2663fc843b..27064906bd 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -130,8 +130,6 @@ function LatitudeLongitudeGrid(arch::Distributed, φl = partition(latitude, nφ, arch, 2) zl = partition(z, nz, arch, 3) - @info arch.local_rank longitude latitude λl φl - # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, # it is stretched if being passed is a function or vector (as for the VerticallyStretchedRectilinearGrid) From 8319dbcdfd1f04fd3824f60d0e8224b38213c0e4 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 1 Dec 2023 14:27:48 -0500 Subject: [PATCH 015/138] test it like this --- src/ImmersedBoundaries/active_cells_map.jl | 12 ++++++------ .../hydrostatic_free_surface_ab2_step.jl | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index c8256b9cf2..e9c097fda7 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -126,9 +126,9 @@ function active_cells_surface_map(ibg) return smaller_indices end -@kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid::ActiveSurfaceIBG, p, args, tridiagonal_direction::ZDirection) - Nz = size(grid, 3) - idx = @index(Global, Linear) - i, j = active_linear_index_to_surface_tuple(idx, grid) - solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) -end \ No newline at end of file +# @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid::ActiveSurfaceIBG, p, args, tridiagonal_direction::ZDirection) +# Nz = size(grid, 3) +# idx = @index(Global, Linear) +# i, j = active_linear_index_to_surface_tuple(idx, grid) +# solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) +# end \ No newline at end of file diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl index 6cf8f349db..a0709d9d72 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl @@ -58,8 +58,7 @@ function ab2_step_velocities!(velocities, model, Δt, χ) model.diffusivity_fields, nothing, model.clock, - Δt; - only_active_cells = only_active_surface_cells) + Δt) end return nothing @@ -95,8 +94,7 @@ function ab2_step_tracers!(tracers, model, Δt, χ) model.diffusivity_fields, Val(tracer_index), model.clock, - Δt; - only_active_cells = only_active_surface_cells) + Δt) end return nothing From b78b04263cac700a579eb1338ddaa1b03e5c8559 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sat, 2 Dec 2023 19:10:03 -0500 Subject: [PATCH 016/138] I hope it works! --- src/ImmersedBoundaries/active_cells_map.jl | 87 ++++++++++++++++--- ...static_free_surface_boundary_tendencies.jl | 20 ++++- ...ute_hydrostatic_free_surface_tendencies.jl | 18 ++-- .../hydrostatic_free_surface_ab2_step.jl | 12 +-- .../split_explicit_free_surface_kernels.jl | 12 +-- .../compute_nonhydrostatic_tendencies.jl | 18 ++-- src/TimeSteppers/quasi_adams_bashforth_2.jl | 14 +-- src/TimeSteppers/store_tendencies.jl | 12 +-- 8 files changed, 117 insertions(+), 76 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index e9c097fda7..4538470d69 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -7,23 +7,51 @@ import Oceananigans.Utils: active_cells_work_layout, use_only_active_interior_cells using Oceananigans.Solvers: solve_batched_tridiagonal_system_z!, ZDirection +using Oceananigans.DistributedComputations: DistributedGrid import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! -const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} -const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, Union{<:AbstractArray, <:NamedTuple}} +const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:DistributedGrid, <:Any, <:Any, <:Any, <:Any, <:Any, <:NamedTuple} struct InteriorMap end struct SurfaceMap end +struct WestMap end +struct EastMap end +struct SouthMap end +struct NorthMap end + +active_map(::Val{:west}) = WestMap() +active_map(::Val{:east}) = EastMap() +active_map(::Val{:south}) = SouthMap() +active_map(::Val{:north}) = NorthMap() + @inline use_only_active_surface_cells(::AbstractGrid) = nothing @inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() @inline use_only_active_surface_cells(::ActiveSurfaceIBG) = SurfaceMap() +@inline use_only_active_west_cells(::DistributedActiveCellsIBG) = WestMap() +@inline use_only_active_east_cells(::DistributedActiveCellsIBG) = EastMap() +@inline use_only_active_south_cells(::DistributedActiveCellsIBG) = SouthMap() +@inline use_only_active_nouth_cells(::DistributedActiveCellsIBG) = NorthMap() + +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) +@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) + +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.interior), 256), length(grid.interior_active_cells.interior) +@inline active_cells_work_layout(group, size, ::WestMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.west), 256), length(grid.interior_active_cells.west) +@inline active_cells_work_layout(group, size, ::EastMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.east), 256), length(grid.interior_active_cells.east) +@inline active_cells_work_layout(group, size, ::SouthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.south), 256), length(grid.interior_active_cells.south) +@inline active_cells_work_layout(group, size, ::NorthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.north), 256), length(grid.interior_active_cells.north) + +@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::ActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) +@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.interior[idx]) +@inline active_linear_index_to_tuple(idx, ::WestMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.west[idx]) +@inline active_linear_index_to_tuple(idx, ::EastMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.east[idx]) +@inline active_linear_index_to_tuple(idx, ::SouthMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.south[idx]) +@inline active_linear_index_to_tuple(idx, ::NorthMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.north[idx]) -@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) -@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) - -@inline active_linear_index_to_interior_tuple(idx, grid::ActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) @inline active_linear_index_to_surface_tuple(idx, grid::ActiveSurfaceIBG) = Base.map(Int, grid.surface_active_cells[idx]) function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) @@ -34,7 +62,6 @@ function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) # Create the cells map on the CPU, then switch it to the GPU if active_cells_map interior_map = active_cells_interior_map(ibg) - interior_map = arch_array(architecture(ibg), interior_map) surface_map = active_cells_surface_map(ibg) surface_map = arch_array(architecture(ibg), surface_map) else @@ -82,6 +109,7 @@ function active_cells_interior_map(ibg) # Cannot findall on the entire field because we incur on OOM errors active_indices = IndicesType[] active_indices = findall_active_indices!(active_indices, active_cells_field, ibg, IndicesType) + active_indices = arch_array(architecture(ibg), active_indices) return active_indices end @@ -126,9 +154,42 @@ function active_cells_surface_map(ibg) return smaller_indices end -# @kernel function solve_batched_tridiagonal_system_kernel!(ϕ, a, b, c, f, t, grid::ActiveSurfaceIBG, p, args, tridiagonal_direction::ZDirection) -# Nz = size(grid, 3) -# idx = @index(Global, Linear) -# i, j = active_linear_index_to_surface_tuple(idx, grid) -# solve_batched_tridiagonal_system_z!(i, j, Nz, ϕ, a, b, c, f, t, grid, p, args, tridiagonal_direction) -# end \ No newline at end of file +# In case of a `DistributedGrid` we want to have different maps depending on the +# partitioning of the domain +function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:DistributedGrid}) + active_cells_field = compute_interior_active_cells(ibg) + + N = maximum(size(ibg)) + IntType = N > MAXUInt8 ? (N > MAXUInt16 ? (N > MAXUInt32 ? UInt64 : UInt32) : UInt16) : UInt8 + + IndicesType = Tuple{IntType, IntType, IntType} + + # Cannot findall on the entire field because we incur on OOM errors + active_indices = IndicesType[] + active_indices = findall_active_indices!(active_indices, active_cells_field, ibg, IndicesType) + active_indices = separate_active_indices!(active_indices, ibg) + + return active_indices +end + +function separate_active_indices!(indices, ibg) + arch = architecture(ibg) + Hx, Hy, _ = halo_size(ibg) + Nx, Ny, _ = size(ibg) + Rx, Ry, _ = arch.ranks + west = Rx > 1 ? findall(idx -> idx[1] <= Hx, indices) : nothing + east = Rx > 1 ? findall(idx -> idx[1] >= Nx-Hx, indices) : nothing + south = Ry > 1 ? findall(idx -> idx[2] <= Hy, indices) : nothing + north = Ry > 1 ? findall(idx -> idx[2] <= Ny-Hy, indices) : nothing + + interior = findall(idx -> !(idx ∈ west) && !(idx ∈ east) && !(idx ∈ south) && !(idx ∈ north), indices) + + interior = arch_array(architecture(ibg), interior) + + west = west isa Nothing ? nothing : arch_array(architecture(ibg), west) + east = east isa Nothing ? nothing : arch_array(architecture(ibg), east) + south = south isa Nothing ? nothing : arch_array(architecture(ibg), south) + north = north isa Nothing ? nothing : arch_array(architecture(ibg), north) + + return (; interior, west, east, south, north) +end \ No newline at end of file diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl index ab510f7c76..b9279574c3 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl @@ -8,6 +8,8 @@ using Oceananigans.Models.NonhydrostaticModels: boundary_tendency_kernel_paramet import Oceananigans.Models: compute_boundary_tendencies! +using Oceananigans.ImmersedBoundaries: active_map, DistributedActiveCellsIBG + # We assume here that top/bottom BC are always synched (no partitioning in z) function compute_boundary_tendencies!(model::HydrostaticFreeSurfaceModel) grid = model.grid @@ -21,12 +23,28 @@ function compute_boundary_tendencies!(model::HydrostaticFreeSurfaceModel) compute_auxiliaries!(model; w_parameters, p_parameters, κ_parameters) # parameters for communicating North / South / East / West side + compute_boundary_tendency_contributions!(grid, arch, model) + + return nothing +end + +function compute_boundary_tendency_contributions!(grid, arch, model) kernel_parameters = boundary_tendency_kernel_parameters(grid, arch) - compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters) + compute_hydrostatic_free_surface_tendency_contributions!(grid, model, kernel_parameters) return nothing end +function compute_boundary_tendency_contributions!(grid::DistributedActiveCellsIBG, arch, model) + maps = grid.interior_active_cells + + for (name, map) in zip(keys(maps), maps) + if name != :interior && !isnothing(map) + compute_hydrostatic_free_surface_tendency_contributions!(model, :xyz; only_active_cells = active_map(Val(name))) + end + end +end + # w needs computing in the range - H + 1 : 0 and N - 1 : N + H - 1 function boundary_w_kernel_parameters(grid, arch) Nx, Ny, _ = size(grid) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index 996eaf1ad4..8ef6d8ea1e 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -14,7 +14,7 @@ import Oceananigans.Models: complete_communication_and_compute_boundary! import Oceananigans.Models: interior_tendency_kernel_parameters using Oceananigans.ImmersedBoundaries: use_only_active_interior_cells, ActiveCellsIBG, - InteriorMap, active_linear_index_to_interior_tuple + InteriorMap, active_linear_index_to_tuple """ compute_tendencies!(model::HydrostaticFreeSurfaceModel, callbacks) @@ -227,9 +227,9 @@ end @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end @@ -239,9 +239,9 @@ end @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) end @@ -255,9 +255,9 @@ end @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end @@ -267,9 +267,9 @@ end @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) end diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl index a0709d9d72..bc7dde1c31 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl @@ -37,17 +37,13 @@ end function ab2_step_velocities!(velocities, model, Δt, χ) - only_active_interior_cells = use_only_active_interior_cells(model.grid) - only_active_surface_cells = use_only_active_surface_cells(model.grid) - for (i, name) in enumerate((:u, :v)) Gⁿ = model.timestepper.Gⁿ[name] G⁻ = model.timestepper.G⁻[name] velocity_field = model.velocities[name] launch!(model.architecture, model.grid, :xyz, - ab2_step_field!, velocity_field, Δt, χ, Gⁿ, G⁻, model.grid; - only_active_cells = only_active_interior_cells) + ab2_step_field!, velocity_field, Δt, χ, Gⁿ, G⁻, model.grid) # TODO: let next implicit solve depend on previous solve + explicit velocity step # Need to distinguish between solver events and tendency calculation events. @@ -74,9 +70,6 @@ ab2_step_tracers!(::EmptyNamedTuple, model, Δt, χ) = nothing function ab2_step_tracers!(tracers, model, Δt, χ) - only_active_interior_cells = use_only_active_interior_cells(model.grid) - only_active_surface_cells = use_only_active_surface_cells(model.grid) - # Tracer update kernels for (tracer_index, tracer_name) in enumerate(propertynames(tracers)) Gⁿ = model.timestepper.Gⁿ[tracer_name] @@ -85,8 +78,7 @@ function ab2_step_tracers!(tracers, model, Δt, χ) closure = model.closure launch!(model.architecture, model.grid, :xyz, - ab2_step_field!, tracer_field, Δt, χ, Gⁿ, G⁻, model.grid; - only_active_cells = only_active_interior_cells) + ab2_step_field!, tracer_field, Δt, χ, Gⁿ, G⁻, model.grid) implicit_step!(tracer_field, model.timestepper.implicit_solver, diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 754ffa362b..e068a9874f 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -310,15 +310,6 @@ end end end -@kernel function barotropic_split_explicit_corrector_kernel!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid::ActiveCellsIBG) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) - @inbounds begin - u[i, j, k] = u[i, j, k] + (U̅[i, j] - U[i, j]) / Hᶠᶜ[i, j] - v[i, j, k] = v[i, j, k] + (V̅[i, j] - V[i, j]) / Hᶜᶠ[i, j] - end -end - # may need to do Val(Nk) since it may not be known at compile. Also figure out where to put H function barotropic_split_explicit_corrector!(u, v, free_surface, grid) sefs = free_surface.state @@ -332,8 +323,7 @@ function barotropic_split_explicit_corrector!(u, v, free_surface, grid) # add in "good" barotropic mode launch!(arch, grid, :xyz, barotropic_split_explicit_corrector_kernel!, - u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid; - only_active_cells = use_only_active_interior_cells(grid)) + u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) return nothing end diff --git a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl index 4e47a7b472..6f9380e619 100644 --- a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl +++ b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl @@ -4,7 +4,7 @@ using Oceananigans.Utils: work_layout using Oceananigans.Models: complete_communication_and_compute_boundary!, interior_tendency_kernel_parameters using Oceananigans.ImmersedBoundaries: use_only_active_interior_cells, ActiveCellsIBG, - InteriorMap, active_linear_index_to_interior_tuple + InteriorMap, active_linear_index_to_tuple import Oceananigans.TimeSteppers: compute_tendencies! @@ -138,9 +138,9 @@ end @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gu!(Gu, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gu!(Gu, grid::ActiveCellsIBG, map::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...) end @@ -150,9 +150,9 @@ end @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gv!(Gv, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gv!(Gv, grid::ActiveCellsIBG, map::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...) end @@ -162,9 +162,9 @@ end @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gw!(Gw, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gw!(Gw, grid::ActiveCellsIBG, map, ::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...) end @@ -178,9 +178,9 @@ end @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...) end -@kernel function compute_Gc!(Gc, grid::ActiveCellsIBG, ::InteriorMap, args) +@kernel function compute_Gc!(Gc, grid::ActiveCellsIBG, map::InteriorMap, args) idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) + i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...) end diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index f8348777f7..f63dcf817f 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -1,7 +1,7 @@ using Oceananigans.Fields: FunctionField, location using Oceananigans.TurbulenceClosures: implicit_step! using Oceananigans.Utils: @apply_regionally, apply_regionally! -using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, active_linear_index_to_interior_tuple +using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, active_linear_index_to_tuple mutable struct QuasiAdamsBashforth2TimeStepper{FT, GT, IT} <: AbstractTimeStepper χ :: FT @@ -159,16 +159,4 @@ Time step velocity fields via the 2nd-order quasi Adams-Bashforth method @inbounds u[i, j, k] += convert(FT, Δt) * ((one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k]) end -@kernel function ab2_step_field!(u, Δt, χ, Gⁿ, G⁻, grid::ActiveCellsIBG) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) - - FT = eltype(χ) - one_point_five = convert(FT, 1.5) - oh_point_five = convert(FT, 0.5) - - @inbounds u[i, j, k] += convert(FT, Δt) * ((one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k]) -end - @kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid) = nothing -@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid::ActiveCellsIBG) = nothing diff --git a/src/TimeSteppers/store_tendencies.jl b/src/TimeSteppers/store_tendencies.jl index ea700ef1e2..b4a117e5b8 100644 --- a/src/TimeSteppers/store_tendencies.jl +++ b/src/TimeSteppers/store_tendencies.jl @@ -9,23 +9,15 @@ using Oceananigans.Utils: launch! @inbounds G⁻[i, j, k] = G⁰[i, j, k] end -""" Store source terms for `u`, `v`, and `w`. """ -@kernel function store_field_tendencies!(G⁻, grid::ActiveCellsIBG, G⁰) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_interior_tuple(idx, grid) - @inbounds G⁻[i, j, k] = G⁰[i, j, k] -end - """ Store previous source terms before updating them. """ -function store_tendencies!(model; only_active_cells = use_only_active_interior_cells(model.grid)) +function store_tendencies!(model) model_fields = prognostic_fields(model) for field_name in keys(model_fields) launch!(model.architecture, model.grid, :xyz, store_field_tendencies!, model.timestepper.G⁻[field_name], model.grid, - model.timestepper.Gⁿ[field_name]; - only_active_cells) + model.timestepper.Gⁿ[field_name]) end return nothing From 11e81431ff7fa8639c7a14de5568abf9c74e85dd Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sat, 2 Dec 2023 19:17:07 -0500 Subject: [PATCH 017/138] bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 4538470d69..65a0a48947 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -180,7 +180,7 @@ function separate_active_indices!(indices, ibg) west = Rx > 1 ? findall(idx -> idx[1] <= Hx, indices) : nothing east = Rx > 1 ? findall(idx -> idx[1] >= Nx-Hx, indices) : nothing south = Ry > 1 ? findall(idx -> idx[2] <= Hy, indices) : nothing - north = Ry > 1 ? findall(idx -> idx[2] <= Ny-Hy, indices) : nothing + north = Ry > 1 ? findall(idx -> idx[2] >= Ny-Hy, indices) : nothing interior = findall(idx -> !(idx ∈ west) && !(idx ∈ east) && !(idx ∈ south) && !(idx ∈ north), indices) From 2a2f77280a3132241224280e2343e48f1422257e Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sat, 2 Dec 2023 19:24:22 -0500 Subject: [PATCH 018/138] bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 21 +++++++++++-------- ...static_free_surface_boundary_tendencies.jl | 4 ++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 65a0a48947..0fed88b291 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -177,19 +177,22 @@ function separate_active_indices!(indices, ibg) Hx, Hy, _ = halo_size(ibg) Nx, Ny, _ = size(ibg) Rx, Ry, _ = arch.ranks - west = Rx > 1 ? findall(idx -> idx[1] <= Hx, indices) : nothing - east = Rx > 1 ? findall(idx -> idx[1] >= Nx-Hx, indices) : nothing - south = Ry > 1 ? findall(idx -> idx[2] <= Hy, indices) : nothing - north = Ry > 1 ? findall(idx -> idx[2] >= Ny-Hy, indices) : nothing + west = Rx > 1 ? findall(idx -> idx[1] <= Hx, indices) : [] + east = Rx > 1 ? findall(idx -> idx[1] >= Nx-Hx, indices) : [] + south = Ry > 1 ? findall(idx -> idx[2] <= Hy, indices) : [] + north = Ry > 1 ? findall(idx -> idx[2] >= Ny-Hy, indices) : [] - interior = findall(idx -> !(idx ∈ west) && !(idx ∈ east) && !(idx ∈ south) && !(idx ∈ north), indices) + interior = findall(idx -> !(idx ∈ west) && + !(idx ∈ east) && + !(idx ∈ south) && + !(idx ∈ north), indices) interior = arch_array(architecture(ibg), interior) - west = west isa Nothing ? nothing : arch_array(architecture(ibg), west) - east = east isa Nothing ? nothing : arch_array(architecture(ibg), east) - south = south isa Nothing ? nothing : arch_array(architecture(ibg), south) - north = north isa Nothing ? nothing : arch_array(architecture(ibg), north) + west = arch_array(architecture(ibg), west) + east = arch_array(architecture(ibg), east) + south = arch_array(architecture(ibg), south) + north = arch_array(architecture(ibg), north) return (; interior, west, east, south, north) end \ No newline at end of file diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl index b9279574c3..3864607641 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl @@ -30,7 +30,7 @@ end function compute_boundary_tendency_contributions!(grid, arch, model) kernel_parameters = boundary_tendency_kernel_parameters(grid, arch) - compute_hydrostatic_free_surface_tendency_contributions!(grid, model, kernel_parameters) + compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters) return nothing end @@ -39,7 +39,7 @@ function compute_boundary_tendency_contributions!(grid::DistributedActiveCellsIB maps = grid.interior_active_cells for (name, map) in zip(keys(maps), maps) - if name != :interior && !isnothing(map) + if name != :interior && !isempy(map) compute_hydrostatic_free_surface_tendency_contributions!(model, :xyz; only_active_cells = active_map(Val(name))) end end From d91cd93d524393f07b5f0ce2426a2af6fcb76697 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sat, 2 Dec 2023 20:45:20 -0500 Subject: [PATCH 019/138] bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 0fed88b291..970b31d70b 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -13,7 +13,7 @@ import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, Union{<:AbstractArray, <:NamedTuple}} const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} -const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:DistributedGrid, <:Any, <:Any, <:Any, <:Any, <:Any, <:NamedTuple} +const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid, <:Any, <:NamedTuple} struct InteriorMap end struct SurfaceMap end @@ -40,10 +40,10 @@ active_map(::Val{:north}) = NorthMap() @inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) @inline active_cells_work_layout(group, size, ::InteriorMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.interior), 256), length(grid.interior_active_cells.interior) -@inline active_cells_work_layout(group, size, ::WestMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.west), 256), length(grid.interior_active_cells.west) -@inline active_cells_work_layout(group, size, ::EastMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.east), 256), length(grid.interior_active_cells.east) -@inline active_cells_work_layout(group, size, ::SouthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.south), 256), length(grid.interior_active_cells.south) -@inline active_cells_work_layout(group, size, ::NorthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.north), 256), length(grid.interior_active_cells.north) +@inline active_cells_work_layout(group, size, ::WestMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.west), 256), length(grid.interior_active_cells.west) +@inline active_cells_work_layout(group, size, ::EastMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.east), 256), length(grid.interior_active_cells.east) +@inline active_cells_work_layout(group, size, ::SouthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.south), 256), length(grid.interior_active_cells.south) +@inline active_cells_work_layout(group, size, ::NorthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.north), 256), length(grid.interior_active_cells.north) @inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::ActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) @inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.interior[idx]) @@ -75,6 +75,9 @@ function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) surface_map) end +with_halo(halo, ibg::ActiveCellsIBG) = + ImmersedBoundaryGrid(with_halo(halo, ibg.underlying_grid), ibg.immersed_boundary; active_cells_map = true) + @inline active_cell(i, j, k, ibg) = !immersed_cell(i, j, k, ibg) @inline active_column(i, j, k, grid, column) = column[i, j, k] != 0 From 8eaf808bc7fe38b8db99b552111975de95f20740 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sat, 2 Dec 2023 21:18:20 -0500 Subject: [PATCH 020/138] bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 970b31d70b..56e3e1989d 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -28,13 +28,10 @@ active_map(::Val{:east}) = EastMap() active_map(::Val{:south}) = SouthMap() active_map(::Val{:north}) = NorthMap() -@inline use_only_active_surface_cells(::AbstractGrid) = nothing -@inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() -@inline use_only_active_surface_cells(::ActiveSurfaceIBG) = SurfaceMap() -@inline use_only_active_west_cells(::DistributedActiveCellsIBG) = WestMap() -@inline use_only_active_east_cells(::DistributedActiveCellsIBG) = EastMap() -@inline use_only_active_south_cells(::DistributedActiveCellsIBG) = SouthMap() -@inline use_only_active_nouth_cells(::DistributedActiveCellsIBG) = NorthMap() +@inline use_only_active_surface_cells(::AbstractGrid) = nothing +@inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() +@inline use_only_active_surface_cells(::ActiveSurfaceIBG) = SurfaceMap() +@inline use_only_active_interior_cells(::DistributedActiveCellsIBG) = InteriorMap() @inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) @inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) @@ -159,7 +156,7 @@ end # In case of a `DistributedGrid` we want to have different maps depending on the # partitioning of the domain -function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:DistributedGrid}) +function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid}) active_cells_field = compute_interior_active_cells(ibg) N = maximum(size(ibg)) From 023e4045b854fc57935707ed58cb917297cb1635 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 3 Dec 2023 09:49:11 -0500 Subject: [PATCH 021/138] couple of bugfixes --- .../distributed_architectures.jl | 2 +- src/ImmersedBoundaries/active_cells_map.jl | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/DistributedComputations/distributed_architectures.jl b/src/DistributedComputations/distributed_architectures.jl index 0f1d5e7a52..33fcfc3da8 100644 --- a/src/DistributedComputations/distributed_architectures.jl +++ b/src/DistributedComputations/distributed_architectures.jl @@ -209,7 +209,7 @@ function Distributed(child_architecture = CPU(); partition = Partition(MPI.Comm_size(communicator))) if !(MPI.Initialized()) - @info "MPI has not been initialized, so we are calling MPI.Init()". + @info "MPI has not been initialized, so we are calling MPI.Init()" MPI.Init() end diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 56e3e1989d..ccb03759b5 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -177,22 +177,22 @@ function separate_active_indices!(indices, ibg) Hx, Hy, _ = halo_size(ibg) Nx, Ny, _ = size(ibg) Rx, Ry, _ = arch.ranks - west = Rx > 1 ? findall(idx -> idx[1] <= Hx, indices) : [] - east = Rx > 1 ? findall(idx -> idx[1] >= Nx-Hx, indices) : [] - south = Ry > 1 ? findall(idx -> idx[2] <= Hy, indices) : [] - north = Ry > 1 ? findall(idx -> idx[2] >= Ny-Hy, indices) : [] + west = Rx > 1 ? findall(idx -> Int(idx[1]) <= Hx, indices) : Int[] + east = Rx > 1 ? findall(idx -> Int(idx[1]) >= Nx-Hx, indices) : Int[] + south = Ry > 1 ? findall(idx -> Int(idx[2]) <= Hy, indices) : Int[] + north = Ry > 1 ? findall(idx -> Int(idx[2]) >= Ny-Hy, indices) : Int[] + + west = arch_array(architecture(ibg), indices[west]) + east = arch_array(architecture(ibg), indices[east]) + south = arch_array(architecture(ibg), indices[south]) + north = arch_array(architecture(ibg), indices[north]) interior = findall(idx -> !(idx ∈ west) && !(idx ∈ east) && !(idx ∈ south) && !(idx ∈ north), indices) - interior = arch_array(architecture(ibg), interior) - - west = arch_array(architecture(ibg), west) - east = arch_array(architecture(ibg), east) - south = arch_array(architecture(ibg), south) - north = arch_array(architecture(ibg), north) + interior = arch_array(architecture(ibg), indices[interior]) return (; interior, west, east, south, north) end \ No newline at end of file From 81e70f699103a0b09d9bda24bb34991b6e5492c6 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 3 Dec 2023 12:41:07 -0500 Subject: [PATCH 022/138] bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index ccb03759b5..61a2b10c10 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -182,17 +182,21 @@ function separate_active_indices!(indices, ibg) south = Ry > 1 ? findall(idx -> Int(idx[2]) <= Hy, indices) : Int[] north = Ry > 1 ? findall(idx -> Int(idx[2]) >= Ny-Hy, indices) : Int[] - west = arch_array(architecture(ibg), indices[west]) - east = arch_array(architecture(ibg), indices[east]) - south = arch_array(architecture(ibg), indices[south]) - north = arch_array(architecture(ibg), indices[north]) + west = indices[west] + east = indices[east] + south = indices[south] + north = indices[north] interior = findall(idx -> !(idx ∈ west) && !(idx ∈ east) && !(idx ∈ south) && !(idx ∈ north), indices) - interior = arch_array(architecture(ibg), indices[interior]) - + interior = arch_array(architecture(ibg), indices[interior]) + west = arch_array(architecture(ibg), west ) + east = arch_array(architecture(ibg), east ) + south = arch_array(architecture(ibg), south) + north = arch_array(architecture(ibg), north) + return (; interior, west, east, south, north) end \ No newline at end of file From 7ab646ba31be6fc2f78d7a0430f1d543e66d42d7 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 3 Dec 2023 16:29:58 -0500 Subject: [PATCH 023/138] bugfixes --- src/ImmersedBoundaries/active_cells_map.jl | 111 +++++++++--------- ...static_free_surface_boundary_tendencies.jl | 7 +- .../split_explicit_free_surface_kernels.jl | 12 +- .../distributed_hydrostatic_turbulence.jl | 23 ++-- 4 files changed, 79 insertions(+), 74 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 61a2b10c10..a10efa0a7e 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -1,4 +1,5 @@ using Oceananigans +using Oceananigans.Utils using Oceananigans.Grids: AbstractGrid using KernelAbstractions: @kernel, @index @@ -11,9 +12,10 @@ using Oceananigans.DistributedComputations: DistributedGrid import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! -const ActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, Union{<:AbstractArray, <:NamedTuple}} const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid, <:Any, <:NamedTuple} +const SerialActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +const ActiveCellsIBG = Union{DistributedActiveCellsIBG, SerialActiveCellsIBG} struct InteriorMap end struct SurfaceMap end @@ -49,7 +51,7 @@ active_map(::Val{:north}) = NorthMap() @inline active_linear_index_to_tuple(idx, ::SouthMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.south[idx]) @inline active_linear_index_to_tuple(idx, ::NorthMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.north[idx]) -@inline active_linear_index_to_surface_tuple(idx, grid::ActiveSurfaceIBG) = Base.map(Int, grid.surface_active_cells[idx]) +@inline active_linear_index_to_tuple(idx, ::SurfaceMap, grid::ActiveSurfaceIBG) = Base.map(Int, grid.surface_active_cells[idx]) function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) @@ -78,10 +80,15 @@ with_halo(halo, ibg::ActiveCellsIBG) = @inline active_cell(i, j, k, ibg) = !immersed_cell(i, j, k, ibg) @inline active_column(i, j, k, grid, column) = column[i, j, k] != 0 -function compute_interior_active_cells(ibg) - is_immersed_operation = KernelFunctionOperation{Center, Center, Center}(active_cell, ibg) +@kernel function _set_active_indices!(active_cells_field, grid) + i, j, k = @index(Global, NTuple) + @inbounds active_cells_field[i, j, k] = active_cell(i, j, k, grid) +end + +function compute_interior_active_cells(ibg; parameters = :xyz) active_cells_field = Field{Center, Center, Center}(ibg, Bool) - set!(active_cells_field, is_immersed_operation) + fill!(active_cells_field, false) + launch!(architecture(ibg), ibg, parameters, _set_active_indices!, active_cells_field, ibg) return active_cells_field end @@ -98,8 +105,8 @@ const MAXUInt8 = 2^8 - 1 const MAXUInt16 = 2^16 - 1 const MAXUInt32 = 2^32 - 1 -function active_cells_interior_map(ibg) - active_cells_field = compute_interior_active_cells(ibg) +function active_interior_indices(ibg; parameters = :xyz) + active_cells_field = compute_interior_active_cells(ibg; parameters) N = maximum(size(ibg)) IntType = N > MAXUInt8 ? (N > MAXUInt16 ? (N > MAXUInt32 ? UInt64 : UInt32) : UInt16) : UInt8 @@ -137,6 +144,49 @@ end @inline add_3rd_index(t::Tuple, k) = (t[1], t[2], k) +active_cells_interior_map(ibg) = active_interior_indices(ibg; parameters = :xyz) + +# In case of a `DistributedGrid` we want to have different maps depending on the +# partitioning of the domain +function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid}) + + arch = architecture(ibg) + Rx, Ry, _ = arch.ranks + Tx, Ty, _ = topology(ibg) + Nx, Ny, Nz = size(ibg) + Hx, Hy, _ = halo_size(ibg) + + Sx = (Hx, Ny, Nz) + Sy = (Nx, Hy, Nz) + + Oᴸ = (0, 0, 0) + Oxᴿ = (Nx-Hx, 0, 0) + Oyᴿ = (0, Ny-Hy, 0) + + sizes = (Sx, Sy, Sx, Sy) + offs = (Oᴸ, Oᴸ, Oxᴿ, Oyᴿ) + + include_west = !isa(ibg, XFlatGrid) && (Rx != 1) && !(Tx == RightConnected) + include_east = !isa(ibg, XFlatGrid) && (Rx != 1) && !(Tx == LeftConnected) + include_south = !isa(ibg, YFlatGrid) && (Ry != 1) && !(Ty == RightConnected) + include_north = !isa(ibg, YFlatGrid) && (Ry != 1) && !(Ty == LeftConnected) + + west = include_west ? active_interior_indices(ibg; parameters = KernelParameters(Sx, Oᴸ)) : nothing + east = include_east ? active_interior_indices(ibg; parameters = KernelParameters(Sx, Oxᴿ)) : nothing + south = include_south ? active_interior_indices(ibg; parameters = KernelParameters(Sy, Oᴸ)) : nothing + north = include_north ? active_interior_indices(ibg; parameters = KernelParameters(Sy, Oyᴿ)) : nothing + + nx = Rx == 1 ? Nx : (Tx == RightConnected || Tx == LeftConnected ? Nx - Hx : Nx - 2Hx) + ny = Ry == 1 ? Ny : (Ty == RightConnected || Ty == LeftConnected ? Ny - Hy : Ny - 2Hy) + + ox = Rx == 1 || Tx == RightConnected ? 0 : Hx + oy = Ry == 1 || Ty == RightConnected ? 0 : Hy + + interior = active_interior_indices(ibg; parameters = KernelParameters((nx, ny, Nz), (ox, oy, 0))) + + return (; interior, west, east, south, north) +end + # If we eventually want to perform also barotropic step, `w` computation and `p` # computation only on active `columns` function active_cells_surface_map(ibg) @@ -153,50 +203,3 @@ function active_cells_surface_map(ibg) return smaller_indices end - -# In case of a `DistributedGrid` we want to have different maps depending on the -# partitioning of the domain -function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid}) - active_cells_field = compute_interior_active_cells(ibg) - - N = maximum(size(ibg)) - IntType = N > MAXUInt8 ? (N > MAXUInt16 ? (N > MAXUInt32 ? UInt64 : UInt32) : UInt16) : UInt8 - - IndicesType = Tuple{IntType, IntType, IntType} - - # Cannot findall on the entire field because we incur on OOM errors - active_indices = IndicesType[] - active_indices = findall_active_indices!(active_indices, active_cells_field, ibg, IndicesType) - active_indices = separate_active_indices!(active_indices, ibg) - - return active_indices -end - -function separate_active_indices!(indices, ibg) - arch = architecture(ibg) - Hx, Hy, _ = halo_size(ibg) - Nx, Ny, _ = size(ibg) - Rx, Ry, _ = arch.ranks - west = Rx > 1 ? findall(idx -> Int(idx[1]) <= Hx, indices) : Int[] - east = Rx > 1 ? findall(idx -> Int(idx[1]) >= Nx-Hx, indices) : Int[] - south = Ry > 1 ? findall(idx -> Int(idx[2]) <= Hy, indices) : Int[] - north = Ry > 1 ? findall(idx -> Int(idx[2]) >= Ny-Hy, indices) : Int[] - - west = indices[west] - east = indices[east] - south = indices[south] - north = indices[north] - - interior = findall(idx -> !(idx ∈ west) && - !(idx ∈ east) && - !(idx ∈ south) && - !(idx ∈ north), indices) - - interior = arch_array(architecture(ibg), indices[interior]) - west = arch_array(architecture(ibg), west ) - east = arch_array(architecture(ibg), east ) - south = arch_array(architecture(ibg), south) - north = arch_array(architecture(ibg), north) - - return (; interior, west, east, south, north) -end \ No newline at end of file diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl index 3864607641..eca6381402 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl @@ -31,7 +31,6 @@ end function compute_boundary_tendency_contributions!(grid, arch, model) kernel_parameters = boundary_tendency_kernel_parameters(grid, arch) compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters) - return nothing end @@ -39,10 +38,12 @@ function compute_boundary_tendency_contributions!(grid::DistributedActiveCellsIB maps = grid.interior_active_cells for (name, map) in zip(keys(maps), maps) - if name != :interior && !isempy(map) - compute_hydrostatic_free_surface_tendency_contributions!(model, :xyz; only_active_cells = active_map(Val(name))) + if name != :interior && !isnothing(map) + compute_hydrostatic_free_surface_tendency_contributions!(model, tuple(:xyz); only_active_cells = active_map(Val(name))) end end + + return nothing end # w needs computing in the range - H + 1 : 0 and N - 1 : N + H - 1 diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index e068a9874f..db331408c7 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -6,9 +6,9 @@ using Oceananigans.AbstractOperations: Δz using Oceananigans.BoundaryConditions using Oceananigans.Operators using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node -using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f +using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f, SurfaceMap using Oceananigans.ImmersedBoundaries: mask_immersed_field!, use_only_active_surface_cells, use_only_active_interior_cells -using Oceananigans.ImmersedBoundaries: active_linear_index_to_surface_tuple, ActiveCellsIBG, ActiveSurfaceIBG +using Oceananigans.ImmersedBoundaries: active_linear_index_to_tuple, ActiveCellsIBG, ActiveSurfaceIBG # constants for AB3 time stepping scheme (from https://doi.org/10.1016/j.ocemod.2004.08.002) const β = 0.281105 @@ -150,7 +150,7 @@ end Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) idx = @index(Global, Linear) - i, j = active_linear_index_to_surface_tuple(idx, grid) + i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) end @@ -186,7 +186,7 @@ end Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) idx = @index(Global, Linear) - i, j = active_linear_index_to_surface_tuple(idx, grid) + i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, @@ -262,7 +262,7 @@ end # u_Δz = u * Δz @kernel function _barotropic_mode_kernel!(U, V, grid::ActiveSurfaceIBG, u, v) idx = @index(Global, Linear) - i, j = active_linear_index_to_surface_tuple(idx, grid) + i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) # hand unroll first loop @inbounds U[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] @@ -424,7 +424,7 @@ end # Calculate RHS for the barotopic time step. @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid::ActiveSurfaceIBG, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) idx = @index(Global, Linear) - i, j = active_linear_index_to_surface_tuple(idx, grid) + i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) # hand unroll first loop @inbounds Gᵁ[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) diff --git a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl index 5fbe1c766a..8ba70ba83f 100644 --- a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl +++ b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl @@ -6,6 +6,7 @@ using Statistics using Oceananigans.BoundaryConditions using Oceananigans.DistributedComputations using Random +using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, use_only_active_interior_cells # Run with # @@ -13,10 +14,14 @@ using Random # mpiexec -n 4 julia --project distributed_hydrostatic_turbulence.jl # ``` -function run_simulation(nx, ny, arch, topo) - grid = RectilinearGrid(arch; topology=topo, size=(nx, ny, 1), extent=(4π, 4π, 0.5), halo=(7, 7, 7)) +function run_simulation(nx, ny, arch; topology = (Periodic, Periodic, Bounded)) + grid = RectilinearGrid(arch; topology, size = (Nx, Ny, 1), extent=(4π, 4π, 0.5), halo=(7, 7, 7)) + bottom(x, y) = (x > π && x < 3π/2 && y > π/2 && y < 3π/2) ? 1.0 : - grid.Lz - 1.0 - grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom)) + grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom); active_cells_map = true) + + @show grid isa ActiveCellsIBG + @show use_only_active_interior_cells(grid) model = HydrostaticFreeSurfaceModel(; grid, momentum_advection = VectorInvariant(vorticity_scheme=WENO(order=9)), @@ -60,17 +65,13 @@ function run_simulation(nx, ny, arch, topo) MPI.Barrier(arch.communicator) end -topo = (Periodic, Periodic, Bounded) +Nx = 128 +Ny = 128 -# Use non-uniform partitioning in x, y. -# TODO: Explain what local_index is. -nx = [90, 128-90][arch.local_index[1]] -ny = [56, 128-56][arch.local_index[2]] -@show arch.local_index -arch = Distributed(CPU(), topology = topo, ranks=(2, 2, 1)) +arch = Distributed(CPU(), partition = Partition(2, 2)) # Run the simulation -run_simulation(nx, ny, arch, topo) +run_simulation(Nx, Ny, arch) # Visualize the plane # Produce a video for variable `var` From a6ab8406dd076590d505d0fbe3cec35c1f6fda7c Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 3 Dec 2023 18:38:51 -0500 Subject: [PATCH 024/138] changes --- .../compute_hydrostatic_free_surface_tendencies.jl | 8 ++++---- .../store_hydrostatic_free_surface_tendencies.jl | 6 ++---- src/TimeSteppers/store_tendencies.jl | 3 +-- .../distributed_hydrostatic_turbulence.jl | 5 ++--- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index 8ef6d8ea1e..8f69e3c773 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -222,7 +222,7 @@ end ##### """ Calculate the right-hand-side of the u-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end @@ -234,7 +234,7 @@ end end """ Calculate the right-hand-side of the v-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) end @@ -250,7 +250,7 @@ end ##### """ Calculate the right-hand-side of the tracer advection-diffusion equation. """ -@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end @@ -262,7 +262,7 @@ end end """ Calculate the right-hand-side of the subgrid scale energy equation. """ -@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, interior_map, args) +@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, map, args) i, j, k = @index(Global, NTuple) @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) end diff --git a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl index 24d26c213c..7ae1cda0b5 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl @@ -28,7 +28,7 @@ function store_free_surface_tendency!(::ExplicitFreeSurface, model) end """ Store previous source terms before updating them. """ -function store_tendencies!(model::HydrostaticFreeSurfaceModel; only_active_cells = use_only_active_interior_cells(model.grid)) +function store_tendencies!(model::HydrostaticFreeSurfaceModel) prognostic_field_names = keys(prognostic_fields(model)) three_dimensional_prognostic_field_names = filter(name -> name != :η, prognostic_field_names) @@ -37,9 +37,7 @@ function store_tendencies!(model::HydrostaticFreeSurfaceModel; only_active_cells launch!(model.architecture, model.grid, :xyz, store_field_tendencies!, model.timestepper.G⁻[field_name], - model.grid, - model.timestepper.Gⁿ[field_name]; - only_active_cells) + model.timestepper.Gⁿ[field_name]) end diff --git a/src/TimeSteppers/store_tendencies.jl b/src/TimeSteppers/store_tendencies.jl index b4a117e5b8..e2f1b07c7e 100644 --- a/src/TimeSteppers/store_tendencies.jl +++ b/src/TimeSteppers/store_tendencies.jl @@ -4,7 +4,7 @@ using Oceananigans.ImmersedBoundaries: ActiveCellsIBG using Oceananigans.Utils: launch! """ Store source terms for `u`, `v`, and `w`. """ -@kernel function store_field_tendencies!(G⁻, grid, G⁰) +@kernel function store_field_tendencies!(G⁻, G⁰) i, j, k = @index(Global, NTuple) @inbounds G⁻[i, j, k] = G⁰[i, j, k] end @@ -16,7 +16,6 @@ function store_tendencies!(model) for field_name in keys(model_fields) launch!(model.architecture, model.grid, :xyz, store_field_tendencies!, model.timestepper.G⁻[field_name], - model.grid, model.timestepper.Gⁿ[field_name]) end diff --git a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl index 8ba70ba83f..fdec0fcc83 100644 --- a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl +++ b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl @@ -43,9 +43,9 @@ function run_simulation(nx, ny, arch; topology = (Periodic, Periodic, Bounded)) set!(c, mask) u, v, _ = model.velocities - ζ = VerticalVorticityField(model) + # ζ = VerticalVorticityField(model) η = model.free_surface.η - outputs = merge(model.velocities, model.tracers, (; ζ, η)) + outputs = merge(model.velocities, model.tracers) progress(sim) = @info "Iteration: $(sim.model.clock.iteration), time: $(sim.model.clock.time), Δt: $(sim.Δt)" simulation = Simulation(model, Δt=0.02, stop_time=100.0) @@ -110,7 +110,6 @@ try if MPI.Comm_rank(MPI.COMM_WORLD) == 0 visualize_simulation("u") visualize_simulation("v") - visualize_simulation("ζ") visualize_simulation("c") end catch err From 6c96131a9a2861a844a319de795542f6cddf5a29 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 3 Dec 2023 19:22:03 -0500 Subject: [PATCH 025/138] try like this --- .../compute_hydrostatic_free_surface_boundary_tendencies.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl index eca6381402..767454abc1 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_boundary_tendencies.jl @@ -56,8 +56,8 @@ function boundary_w_kernel_parameters(grid, arch) # Offsets in tangential direction are == -1 to # cover the required corners - Oxᴸ = (-Hx, -1) - Oyᴸ = (-1, -Hy) + Oxᴸ = (-Hx+1, -1) + Oyᴸ = (-1, -Hy+1) Oxᴿ = (Nx-1, -1) Oyᴿ = (-1, Ny-1) From 805790933abf4e93c9ad4b56dd7be71ce644b53b Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 11:22:31 -0500 Subject: [PATCH 026/138] some tests... --- src/Grids/grid_generation.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Grids/grid_generation.jl b/src/Grids/grid_generation.jl index b1480aa1f3..f79a2829dc 100644 --- a/src/Grids/grid_generation.jl +++ b/src/Grids/grid_generation.jl @@ -13,6 +13,7 @@ get_face_node(coord::Function, i) = coord(i) get_face_node(coord::AbstractVector, i) = CUDA.@allowscalar coord[i] const AT = AbstractTopology + lower_exterior_Δcoordᶠ(::AT, Fi, Hcoord) = [Fi[end - Hcoord + i] - Fi[end - Hcoord + i - 1] for i = 1:Hcoord] lower_exterior_Δcoordᶠ(::BoundedTopology, Fi, Hcoord) = [Fi[2] - Fi[1] for _ = 1:Hcoord] @@ -34,6 +35,8 @@ function generate_coordinate(FT, topo::AT, N, H, node_generator, coordinate_name # Ensure correct type for F and derived quantities interior_face_nodes = zeros(FT, N+1) + @show typeof(node_generator), size(node_generator), coordinate_name + # Use the user-supplied "generator" to build the interior nodes for idx = 1:N+1 interior_face_nodes[idx] = get_face_node(node_generator, idx) From 183ea906347465d9053becdb17fb9ef335932c6d Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 11:44:03 -0500 Subject: [PATCH 027/138] show the coordinate --- src/DistributedComputations/distributed_grids.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 27064906bd..7d94aa3896 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -130,6 +130,8 @@ function LatitudeLongitudeGrid(arch::Distributed, φl = partition(latitude, nφ, arch, 2) zl = partition(z, nz, arch, 3) + @show Base.size(zl), Base.size(z) + # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, # it is stretched if being passed is a function or vector (as for the VerticallyStretchedRectilinearGrid) From 05593e21aad1b2757424758511e2f1846561c835 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 11:52:20 -0500 Subject: [PATCH 028/138] bugfix --- src/DistributedComputations/distributed_grids.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 7d94aa3896..b15d61f07f 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -126,11 +126,9 @@ function LatitudeLongitudeGrid(arch::Distributed, TY = insert_connected_topology(topology[2], Ry, rj) TZ = insert_connected_topology(topology[3], Rz, rk) - λl = partition(longitude, nλ, arch, 1) - φl = partition(latitude, nφ, arch, 2) - zl = partition(z, nz, arch, 3) - - @show Base.size(zl), Base.size(z) + λl = partition(longitude, nλ, arch, 1) + φl = partition(latitude, nφ, arch, 2) + zl = partition(z, nz+1, arch, 3) # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, From 44dbee0062959acce7d90559f406e8df844c26c8 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 11:59:14 -0500 Subject: [PATCH 029/138] bugfix --- src/DistributedComputations/distributed_grids.jl | 6 +++--- src/DistributedComputations/partition_assemble.jl | 7 ++++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index b15d61f07f..27064906bd 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -126,9 +126,9 @@ function LatitudeLongitudeGrid(arch::Distributed, TY = insert_connected_topology(topology[2], Ry, rj) TZ = insert_connected_topology(topology[3], Rz, rk) - λl = partition(longitude, nλ, arch, 1) - φl = partition(latitude, nφ, arch, 2) - zl = partition(z, nz+1, arch, 3) + λl = partition(longitude, nλ, arch, 1) + φl = partition(latitude, nφ, arch, 2) + zl = partition(z, nz, arch, 3) # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, diff --git a/src/DistributedComputations/partition_assemble.jl b/src/DistributedComputations/partition_assemble.jl index 1010c0f6bf..0e96374955 100644 --- a/src/DistributedComputations/partition_assemble.jl +++ b/src/DistributedComputations/partition_assemble.jl @@ -41,7 +41,12 @@ end function partition(c::AbstractVector, n, arch, idx) nl = concatenate_local_sizes(n, arch, idx) r = arch.local_index[idx] - return c[1 + sum(nl[1:r-1]) : sum(nl[1:r])] + # Allow for Face values + if r == arch.ranks[idx] + return c[1 + sum(nl[1:r-1]) : end] + else + return c[1 + sum(nl[1:r-1]) : sum(nl[1:r])] + end end function partition(c::Tuple, n, arch, idx) From 8c81e15ff18e74b35c1301d5165826fe7e5d200f Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 21:03:24 -0500 Subject: [PATCH 030/138] test this hypothesis --- src/ImmersedBoundaries/active_cells_map.jl | 21 ++++++++----------- ...ute_hydrostatic_free_surface_tendencies.jl | 2 ++ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index a10efa0a7e..92a8ca6517 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -156,25 +156,22 @@ function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any Nx, Ny, Nz = size(ibg) Hx, Hy, _ = halo_size(ibg) - Sx = (Hx, Ny, Nz) - Sy = (Nx, Hy, Nz) + x_boundary = (Hx, Ny, Nz) + y_boundary = (Nx, Hy, Nz) - Oᴸ = (0, 0, 0) - Oxᴿ = (Nx-Hx, 0, 0) - Oyᴿ = (0, Ny-Hy, 0) - - sizes = (Sx, Sy, Sx, Sy) - offs = (Oᴸ, Oᴸ, Oxᴿ, Oyᴿ) + left_offsets = (0, 0, 0) + right_x_offsets = (Nx-Hx, 0, 0) + right_y_offsets = (0, Ny-Hy, 0) include_west = !isa(ibg, XFlatGrid) && (Rx != 1) && !(Tx == RightConnected) include_east = !isa(ibg, XFlatGrid) && (Rx != 1) && !(Tx == LeftConnected) include_south = !isa(ibg, YFlatGrid) && (Ry != 1) && !(Ty == RightConnected) include_north = !isa(ibg, YFlatGrid) && (Ry != 1) && !(Ty == LeftConnected) - west = include_west ? active_interior_indices(ibg; parameters = KernelParameters(Sx, Oᴸ)) : nothing - east = include_east ? active_interior_indices(ibg; parameters = KernelParameters(Sx, Oxᴿ)) : nothing - south = include_south ? active_interior_indices(ibg; parameters = KernelParameters(Sy, Oᴸ)) : nothing - north = include_north ? active_interior_indices(ibg; parameters = KernelParameters(Sy, Oyᴿ)) : nothing + west = include_west ? active_interior_indices(ibg; parameters = KernelParameters(x_boundary, left_offsets)) : nothing + east = include_east ? active_interior_indices(ibg; parameters = KernelParameters(x_boundary, right_x_offsets)) : nothing + south = include_south ? active_interior_indices(ibg; parameters = KernelParameters(y_boundary, left_offsets)) : nothing + north = include_north ? active_interior_indices(ibg; parameters = KernelParameters(y_boundary, right_y_offsets)) : nothing nx = Rx == 1 ? Nx : (Tx == RightConnected || Tx == LeftConnected ? Nx - Hx : Nx - 2Hx) ny = Ry == 1 ? Ny : (Ty == RightConnected || Ty == LeftConnected ? Ny - Hy : Ny - 2Hy) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index 8f69e3c773..2403aaf5b6 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -223,11 +223,13 @@ end """ Calculate the right-hand-side of the u-velocity equation. """ @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) + @show "I am inside the non-active kernel" i, j, k = @index(Global, NTuple) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, map, args) + @show "I am inside the active kernel" idx = @index(Global, Linear) i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) From 3e88fb4f5b5ddc6d1542bdb63030ee981ea45a50 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 21:38:29 -0500 Subject: [PATCH 031/138] another test --- src/ImmersedBoundaries/active_cells_map.jl | 7 +++++-- .../compute_hydrostatic_free_surface_tendencies.jl | 5 +++-- src/Utils/kernel_launching.jl | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 92a8ca6517..008c155b8f 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -5,7 +5,8 @@ using Oceananigans.Grids: AbstractGrid using KernelAbstractions: @kernel, @index import Oceananigans.Utils: active_cells_work_layout, - use_only_active_interior_cells + use_only_active_interior_cells, + use_only_active_surface_cells using Oceananigans.Solvers: solve_batched_tridiagonal_system_z!, ZDirection using Oceananigans.DistributedComputations: DistributedGrid @@ -31,8 +32,10 @@ active_map(::Val{:south}) = SouthMap() active_map(::Val{:north}) = NorthMap() @inline use_only_active_surface_cells(::AbstractGrid) = nothing -@inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() @inline use_only_active_surface_cells(::ActiveSurfaceIBG) = SurfaceMap() + +@inline use_only_active_interior_cells(::AbstractGrid) = nothing +@inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() @inline use_only_active_interior_cells(::DistributedActiveCellsIBG) = InteriorMap() @inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index 2403aaf5b6..cd1726e7a7 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -181,6 +181,9 @@ function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_para u_kernel_args = tuple(start_momentum_kernel_args..., u_immersed_bc, end_momentum_kernel_args...) v_kernel_args = tuple(start_momentum_kernel_args..., v_immersed_bc, end_momentum_kernel_args...) + @show grid isa ActiveCellsIBG + @show only_active_cells + for parameters in kernel_parameters launch!(arch, grid, parameters, compute_hydrostatic_free_surface_Gu!, model.timestepper.Gⁿ.u, grid, @@ -223,13 +226,11 @@ end """ Calculate the right-hand-side of the u-velocity equation. """ @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) - @show "I am inside the non-active kernel" i, j, k = @index(Global, NTuple) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, map, args) - @show "I am inside the active kernel" idx = @index(Global, Linear) i, j, k = active_linear_index_to_tuple(idx, map, grid) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index dfdb741199..1d0b744937 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -106,7 +106,6 @@ function work_layout(grid, workdims::Symbol; include_right_boundaries=false, loc end @inline active_cells_work_layout(workgroup, worksize, only_active_cells, grid) = workgroup, worksize -@inline use_only_active_interior_cells(grid) = nothing """ launch!(arch, grid, layout, kernel!, args...; kwargs...) From d74f5f518ede95aa6d72aadc32cf0f57a8116998 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 21:39:47 -0500 Subject: [PATCH 032/138] bugfix --- .../ri_based_vertical_diffusivity.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl b/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl index 16dc985252..acc96fd408 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl @@ -3,7 +3,6 @@ using Oceananigans.BuoyancyModels: ∂z_b using Oceananigans.Operators using Oceananigans.Grids: inactive_node using Oceananigans.Operators: ℑzᵃᵃᶜ -using Oceananigans.Utils: use_only_active_interior_cells struct RiBasedVerticalDiffusivity{TD, FT, R} <: AbstractScalarDiffusivity{TD, VerticalFormulation, 1} ν₀ :: FT From 25a1dbb193e571fa24e74b3f421e01b9742196a7 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 21:40:42 -0500 Subject: [PATCH 033/138] other bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 008c155b8f..2bfd2bc627 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -4,9 +4,7 @@ using Oceananigans.Grids: AbstractGrid using KernelAbstractions: @kernel, @index -import Oceananigans.Utils: active_cells_work_layout, - use_only_active_interior_cells, - use_only_active_surface_cells +import Oceananigans.Utils: active_cells_work_layout using Oceananigans.Solvers: solve_batched_tridiagonal_system_z!, ZDirection using Oceananigans.DistributedComputations: DistributedGrid From 188eedc667db35db1f6ed867b5ce60b438effc40 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 21:50:52 -0500 Subject: [PATCH 034/138] now we'll see... --- ...ute_hydrostatic_free_surface_tendencies.jl | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index cd1726e7a7..e7fa777da1 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -183,6 +183,7 @@ function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_para @show grid isa ActiveCellsIBG @show only_active_cells + @show size(grid.interior_active_cells.interior) for parameters in kernel_parameters launch!(arch, grid, parameters, @@ -224,11 +225,11 @@ end ##### Tendency calculators for u, v ##### -""" Calculate the right-hand-side of the u-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) - i, j, k = @index(Global, NTuple) - @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) -end +# """ Calculate the right-hand-side of the u-velocity equation. """ +# @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) +# i, j, k = @index(Global, NTuple) +# @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) +# end @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) @@ -236,11 +237,11 @@ end @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end -""" Calculate the right-hand-side of the v-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, map, args) - i, j, k = @index(Global, NTuple) - @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) -end +# """ Calculate the right-hand-side of the v-velocity equation. """ +# @kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, map, args) +# i, j, k = @index(Global, NTuple) +# @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) +# end @kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) @@ -252,11 +253,11 @@ end ##### Tendency calculators for tracers ##### -""" Calculate the right-hand-side of the tracer advection-diffusion equation. """ -@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, map, args) - i, j, k = @index(Global, NTuple) - @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) -end +# """ Calculate the right-hand-side of the tracer advection-diffusion equation. """ +# @kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, map, args) +# i, j, k = @index(Global, NTuple) +# @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) +# end @kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) @@ -264,11 +265,11 @@ end @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end -""" Calculate the right-hand-side of the subgrid scale energy equation. """ -@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, map, args) - i, j, k = @index(Global, NTuple) - @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) -end +# """ Calculate the right-hand-side of the subgrid scale energy equation. """ +# @kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, map, args) +# i, j, k = @index(Global, NTuple) +# @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) +# end @kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) From 7d418e039f2dd9e9c00cd2fdd993f9dd744e3136 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:03:33 -0500 Subject: [PATCH 035/138] now it will work hopefully --- src/ImmersedBoundaries/active_cells_map.jl | 5 ++- ...ute_hydrostatic_free_surface_tendencies.jl | 40 +++++++++---------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 2bfd2bc627..16946cdceb 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -13,8 +13,9 @@ import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid, <:Any, <:NamedTuple} -const SerialActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} -const ActiveCellsIBG = Union{DistributedActiveCellsIBG, SerialActiveCellsIBG} +const ArrayActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} +const NamedTupleActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:NamedTuple} +const ActiveCellsIBG = Union{DistributedActiveCellsIBG, ArrayActiveCellsIBG, NamedTupleActiveCellsIBG} struct InteriorMap end struct SurfaceMap end diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index e7fa777da1..94c9b868d6 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -225,11 +225,11 @@ end ##### Tendency calculators for u, v ##### -# """ Calculate the right-hand-side of the u-velocity equation. """ -# @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) -# i, j, k = @index(Global, NTuple) -# @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) -# end +""" Calculate the right-hand-side of the u-velocity equation. """ +@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, map, args) + i, j, k = @index(Global, NTuple) + @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) +end @kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) @@ -237,11 +237,11 @@ end @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end -# """ Calculate the right-hand-side of the v-velocity equation. """ -# @kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, map, args) -# i, j, k = @index(Global, NTuple) -# @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) -# end +""" Calculate the right-hand-side of the v-velocity equation. """ +@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, map, args) + i, j, k = @index(Global, NTuple) + @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) +end @kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) @@ -253,11 +253,11 @@ end ##### Tendency calculators for tracers ##### -# """ Calculate the right-hand-side of the tracer advection-diffusion equation. """ -# @kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, map, args) -# i, j, k = @index(Global, NTuple) -# @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) -# end +""" Calculate the right-hand-side of the tracer advection-diffusion equation. """ +@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, map, args) + i, j, k = @index(Global, NTuple) + @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) +end @kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) @@ -265,11 +265,11 @@ end @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end -# """ Calculate the right-hand-side of the subgrid scale energy equation. """ -# @kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, map, args) -# i, j, k = @index(Global, NTuple) -# @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) -# end +""" Calculate the right-hand-side of the subgrid scale energy equation. """ +@kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid, map, args) + i, j, k = @index(Global, NTuple) + @inbounds Ge[i, j, k] = hydrostatic_turbulent_kinetic_energy_tendency(i, j, k, grid, args...) +end @kernel function compute_hydrostatic_free_surface_Ge!(Ge, grid::ActiveCellsIBG, map, args) idx = @index(Global, Linear) From 83b4d5b1183174028b568a44cd49e81b35e5bd3c Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:08:51 -0500 Subject: [PATCH 036/138] all bugs fixed? --- src/ImmersedBoundaries/active_cells_map.jl | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 16946cdceb..546902792d 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -12,7 +12,7 @@ using Oceananigans.DistributedComputations: DistributedGrid import Oceananigans.Solvers: solve_batched_tridiagonal_system_kernel! const ActiveSurfaceIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} -const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid, <:Any, <:NamedTuple} +const DistributedActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid, <:Any, <:NamedTuple} # Cannot be used to dispatch in kernels!!! const ArrayActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:AbstractArray} const NamedTupleActiveCellsIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:Any, <:NamedTuple} const ActiveCellsIBG = Union{DistributedActiveCellsIBG, ArrayActiveCellsIBG, NamedTupleActiveCellsIBG} @@ -37,21 +37,21 @@ active_map(::Val{:north}) = NorthMap() @inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() @inline use_only_active_interior_cells(::DistributedActiveCellsIBG) = InteriorMap() -@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) -@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) - -@inline active_cells_work_layout(group, size, ::InteriorMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.interior), 256), length(grid.interior_active_cells.interior) -@inline active_cells_work_layout(group, size, ::WestMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.west), 256), length(grid.interior_active_cells.west) -@inline active_cells_work_layout(group, size, ::EastMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.east), 256), length(grid.interior_active_cells.east) -@inline active_cells_work_layout(group, size, ::SouthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.south), 256), length(grid.interior_active_cells.south) -@inline active_cells_work_layout(group, size, ::NorthMap, grid::DistributedActiveCellsIBG) = min(length(grid.interior_active_cells.north), 256), length(grid.interior_active_cells.north) - -@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::ActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) -@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.interior[idx]) -@inline active_linear_index_to_tuple(idx, ::WestMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.west[idx]) -@inline active_linear_index_to_tuple(idx, ::EastMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.east[idx]) -@inline active_linear_index_to_tuple(idx, ::SouthMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.south[idx]) -@inline active_linear_index_to_tuple(idx, ::NorthMap, grid::DistributedActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.north[idx]) +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ArrayActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.interior), 256), length(grid.interior_active_cells.interior) +@inline active_cells_work_layout(group, size, ::WestMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.west), 256), length(grid.interior_active_cells.west) +@inline active_cells_work_layout(group, size, ::EastMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.east), 256), length(grid.interior_active_cells.east) +@inline active_cells_work_layout(group, size, ::SouthMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.south), 256), length(grid.interior_active_cells.south) +@inline active_cells_work_layout(group, size, ::NorthMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.north), 256), length(grid.interior_active_cells.north) + +@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::ArrayActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) +@inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.interior[idx]) +@inline active_linear_index_to_tuple(idx, ::WestMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.west[idx]) +@inline active_linear_index_to_tuple(idx, ::EastMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.east[idx]) +@inline active_linear_index_to_tuple(idx, ::SouthMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.south[idx]) +@inline active_linear_index_to_tuple(idx, ::NorthMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.north[idx]) + +@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) @inline active_linear_index_to_tuple(idx, ::SurfaceMap, grid::ActiveSurfaceIBG) = Base.map(Int, grid.surface_active_cells[idx]) From 7a0df19fa9801ff9145c69d572d7477e3e464c28 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Dec 2023 00:03:21 -0500 Subject: [PATCH 037/138] bugfix --- .../split_explicit_free_surface_kernels.jl | 29 ++----------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index db331408c7..5761e7ca29 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -145,15 +145,6 @@ using Printf free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) end -@kernel function split_explicit_free_surface_evolution_kernel!(grid::ActiveSurfaceIBG, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) - idx = @index(Global, Linear) - i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) - free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) -end - @inline function free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) k_top = grid.Nz+1 TX, TY, _ = topology(grid) @@ -180,20 +171,6 @@ end timestepper ) end - -@kernel function split_explicit_barotropic_velocity_evolution_kernel!(grid::ActiveSurfaceIBG, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) - idx = @index(Global, Linear) - i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) - - velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper ) -end - @inline function velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, @@ -235,10 +212,8 @@ function split_explicit_free_surface_substep!(η, state, auxiliary, settings, we η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) - launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, args...; - only_active_cells = use_only_active_surface_cells(grid)) - launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, args...; - only_active_cells = use_only_active_surface_cells(grid)) + launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, args...;) + launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, args...;) return nothing end From 26265794c9ce99d8100b92710506a4d1302d1d26 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Dec 2023 08:59:52 -0500 Subject: [PATCH 038/138] remove the shows --- src/Grids/grid_generation.jl | 2 -- .../compute_hydrostatic_free_surface_tendencies.jl | 4 ---- 2 files changed, 6 deletions(-) diff --git a/src/Grids/grid_generation.jl b/src/Grids/grid_generation.jl index f79a2829dc..98bd421d43 100644 --- a/src/Grids/grid_generation.jl +++ b/src/Grids/grid_generation.jl @@ -35,8 +35,6 @@ function generate_coordinate(FT, topo::AT, N, H, node_generator, coordinate_name # Ensure correct type for F and derived quantities interior_face_nodes = zeros(FT, N+1) - @show typeof(node_generator), size(node_generator), coordinate_name - # Use the user-supplied "generator" to build the interior nodes for idx = 1:N+1 interior_face_nodes[idx] = get_face_node(node_generator, idx) diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index 94c9b868d6..c1168298e6 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -180,10 +180,6 @@ function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_para u_kernel_args = tuple(start_momentum_kernel_args..., u_immersed_bc, end_momentum_kernel_args...) v_kernel_args = tuple(start_momentum_kernel_args..., v_immersed_bc, end_momentum_kernel_args...) - - @show grid isa ActiveCellsIBG - @show only_active_cells - @show size(grid.interior_active_cells.interior) for parameters in kernel_parameters launch!(arch, grid, parameters, From 95e90e7226b24cd43fc5420c23c05bf13f3b2c3e Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Dec 2023 15:18:38 -0500 Subject: [PATCH 039/138] unroll the loop --- .../split_explicit_free_surface_kernels.jl | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 5761e7ca29..6bdb90369b 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -138,8 +138,6 @@ using Oceananigans.DistributedComputations: Distributed using Printf @kernel function split_explicit_free_surface_evolution_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) i, j = @index(Global, NTuple) free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) @@ -208,12 +206,12 @@ function split_explicit_free_surface_substep!(η, state, auxiliary, settings, we parameters = auxiliary.kernel_parameters - args = (grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) - - launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, args...;) - launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, args...;) + launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, + grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) return nothing end @@ -359,6 +357,17 @@ const MINIMUM_SUBSTEPS = 5 @inline calculate_adaptive_settings(substepping::FTS, substeps) = weights_from_substeps(eltype(substepping.Δt_barotropic), substeps, substepping.averaging_kernel) +macro unroll_split_explicit_loop(exp) + lim2 = eval(exp.args[1].args[2].args[3]) + iterator = exp.args[1].args[1] + loop = quote + Base.Cartesian.@nexprs $lim2 $iterator -> $(exp.args[2]) + end + return quote + $(esc(loop)) + end +end + function iterate_split_explicit!(free_surface, grid, Δt) arch = architecture(grid) @@ -375,7 +384,7 @@ function iterate_split_explicit!(free_surface, grid, Δt) Δτᴮ = fractional_Δt * Δt # barotropic time step in seconds - for substep in 1:Nsubsteps + @unroll_split_explicit_loop for substep in 1:Nsubsteps split_explicit_free_surface_substep!(η, state, auxiliary, settings, weights, arch, grid, g, Δτᴮ, substep) end From 4b1f2cd6f1915bf96c7216fd2ce873d045e1e063 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Dec 2023 15:23:35 -0500 Subject: [PATCH 040/138] fully unrolled --- .../split_explicit_free_surface_kernels.jl | 44 ++++++++----------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 6bdb90369b..9dcaa86fa0 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -192,30 +192,6 @@ end end end -function split_explicit_free_surface_substep!(η, state, auxiliary, settings, weights, arch, grid, g, Δτ, substep_index) - # unpack state quantities, parameters and forcing terms - U, V = state.U, state.V - Uᵐ⁻¹, Uᵐ⁻² = state.Uᵐ⁻¹, state.Uᵐ⁻² - Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² - ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² - η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ - Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ - - timestepper = settings.timestepper - averaging_weight = weights[substep_index] - - parameters = auxiliary.kernel_parameters - - launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, - grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) - - return nothing -end - # Barotropic Model Kernels # u_Δz = u * Δz @kernel function _barotropic_mode_kernel!(U, V, grid, u, v) @@ -384,8 +360,26 @@ function iterate_split_explicit!(free_surface, grid, Δt) Δτᴮ = fractional_Δt * Δt # barotropic time step in seconds + # unpack state quantities, parameters and forcing terms + U, V = state.U, state.V + Uᵐ⁻¹, Uᵐ⁻² = state.Uᵐ⁻¹, state.Uᵐ⁻² + Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² + ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² + η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ + + timestepper = settings.timestepper + + parameters = auxiliary.kernel_parameters + @unroll_split_explicit_loop for substep in 1:Nsubsteps - split_explicit_free_surface_substep!(η, state, auxiliary, settings, weights, arch, grid, g, Δτᴮ, substep) + averaging_weight = weights[substep] + launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, + grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) end return nothing From 20a12d132b3feb710ed85a72acfc2528943546ee Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Dec 2023 19:43:33 -0500 Subject: [PATCH 041/138] split explicit loop unrolling --- .../split_explicit_free_surface.jl | 16 +-- .../split_explicit_free_surface_kernels.jl | 98 ++++++++++++++----- 2 files changed, 76 insertions(+), 38 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 8b78623673..fc4dbf33c8 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -85,20 +85,6 @@ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid) free_surface.settings) end -function SplitExplicitFreeSurface(grid; gravitational_acceleration = g_Earth, - settings = SplitExplicitSettings(eltype(grid); gravitational_acceleration, substeps = 200)) - - if eltype(settings) != eltype(grid) - @warn "Using $(eltype(settings)) settings for the SplitExplicitFreeSurface on a $(eltype(grid)) grid" - end - - η = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) - gravitational_acceleration = convert(eltype(grid), gravitational_acceleration) - - return SplitExplicitFreeSurface(η, SplitExplicitState(grid), SplitExplicitAuxiliaryFields(grid), - gravitational_acceleration, settings) -end - """ struct SplitExplicitState @@ -287,7 +273,7 @@ end averaging_weights = averaging_weights[1:idx] averaging_weights ./= sum(averaging_weights) - return Δτ, averaging_weights + return Δτ, tuple(averaging_weights...) end function SplitExplicitSettings(FT::DataType=Float64; diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 9dcaa86fa0..aee662ea91 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -137,13 +137,12 @@ end using Oceananigans.DistributedComputations: Distributed using Printf -@kernel function split_explicit_free_surface_evolution_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - timestepper) +@kernel function _split_explicit_free_surface!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) i, j = @index(Global, NTuple) - free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) end -@inline function free_surface_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) +@inline function free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) k_top = grid.Nz+1 TX, TY, _ = topology(grid) @@ -157,22 +156,22 @@ end return nothing end -@kernel function split_explicit_barotropic_velocity_evolution_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) +@kernel function _split_explicit_barotropic_velocity!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) i, j = @index(Global, NTuple) - velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper ) + velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper ) end -@inline function velocity_evolution_kernel!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper ) +@inline function velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper ) k_top = grid.Nz+1 TX, TY, _ = topology(grid) @@ -251,7 +250,7 @@ function initialize_free_surface_state!(free_surface_state, η) return nothing end -@kernel function barotropic_split_explicit_corrector_kernel!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) +@kernel function _barotropic_split_explicit_corrector!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) i, j, k = @index(Global, NTuple) @inbounds begin u[i, j, k] = u[i, j, k] + (U̅[i, j] - U[i, j]) / Hᶠᶜ[i, j] @@ -271,7 +270,7 @@ function barotropic_split_explicit_corrector!(u, v, free_surface, grid) compute_barotropic_mode!(U, V, grid, u, v) # add in "good" barotropic mode - launch!(arch, grid, :xyz, barotropic_split_explicit_corrector_kernel!, + launch!(arch, grid, :xyz, _barotropic_split_explicit_corrector!, u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) return nothing @@ -344,6 +343,54 @@ macro unroll_split_explicit_loop(exp) end end +const FixedSubstepsSetting{N} = SplitExplicitSettings{<:FixedSubstepNumber{<:Any, <:NTuple{N, <:Any}}} where N +const FixedSubstepsSplitExplicit{F} = SplitExplicitFreeSurface{<:Any, <:Any, <:Any, <:Any, <:FixedSubstepsSetting{N}} where N + +# For a fixed number of substeps it is possible to +function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, grid, Δt) where N + arch = architecture(grid) + + η = free_surface.η + state = free_surface.state + auxiliary = free_surface.auxiliary + settings = free_surface.settings + g = free_surface.gravitational_acceleration + + weights = settings.substepping.averaging_weights + fractional_Δt = settings.substepping.fractional_step_size + + Δτᴮ = fractional_Δt * Δt # barotropic time step in seconds + + # unpack state quantities, parameters and forcing terms + U, V = state.U, state.V + Uᵐ⁻¹, Uᵐ⁻² = state.Uᵐ⁻¹, state.Uᵐ⁻² + Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² + ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² + η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ + + timestepper = settings.timestepper + parameters = auxiliary.kernel_parameters + + @unroll for substep in 1:N + Base.@_inline_meta + + averaging_weight = weights[substep] + + launch!(arch, grid, parameters, _split_explicit_free_surface!, + grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + timestepper) + + launch!(arch, grid, parameters, _split_explicit_barotropic_velocity!, + grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) + end + + return nothing +end + function iterate_split_explicit!(free_surface, grid, Δt) arch = architecture(grid) @@ -359,7 +406,7 @@ function iterate_split_explicit!(free_surface, grid, Δt) Nsubsteps = length(weights) Δτᴮ = fractional_Δt * Δt # barotropic time step in seconds - + # unpack state quantities, parameters and forcing terms U, V = state.U, state.V Uᵐ⁻¹, Uᵐ⁻² = state.Uᵐ⁻¹, state.Uᵐ⁻² @@ -371,15 +418,20 @@ function iterate_split_explicit!(free_surface, grid, Δt) timestepper = settings.timestepper parameters = auxiliary.kernel_parameters + + @unroll for substep in 1:Nsubsteps - @unroll_split_explicit_loop for substep in 1:Nsubsteps averaging_weight = weights[substep] - launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + + launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, + grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + timestepper) + launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, - grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) + timestepper) end return nothing From 58e7acbbe3f4591e79b08efb195982a0551d9f17 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 6 Dec 2023 16:29:22 -0800 Subject: [PATCH 042/138] update --- .../distributed_grids.jl | 24 +++++++++---------- .../partition_assemble.jl | 12 +++++----- .../distributed_hydrostatic_turbulence.jl | 10 ++++---- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 27064906bd..b79705cf90 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -80,9 +80,9 @@ function RectilinearGrid(arch::Distributed, TY = insert_connected_topology(TY, Ry, rj) TZ = insert_connected_topology(TZ, Rz, rk) - xl = partition(x, nx, arch, 1) - yl = partition(y, ny, arch, 2) - zl = partition(z, nz, arch, 3) + xl = Rx == 1 ? x : partition_coordinate(x, nx, arch, 1) + yl = Ry == 1 ? y : partition_coordinate(y, ny, arch, 2) + zl = Rz == 1 ? z : partition_coordinate(z, nz, arch, 3) Lx, xᶠᵃᵃ, xᶜᵃᵃ, Δxᶠᵃᵃ, Δxᶜᵃᵃ = generate_coordinate(FT, topology[1](), nx, Hx, xl, :x, child_architecture(arch)) Ly, yᵃᶠᵃ, yᵃᶜᵃ, Δyᵃᶠᵃ, Δyᵃᶜᵃ = generate_coordinate(FT, topology[2](), ny, Hy, yl, :y, child_architecture(arch)) @@ -126,9 +126,9 @@ function LatitudeLongitudeGrid(arch::Distributed, TY = insert_connected_topology(topology[2], Ry, rj) TZ = insert_connected_topology(topology[3], Rz, rk) - λl = partition(longitude, nλ, arch, 1) - φl = partition(latitude, nφ, arch, 2) - zl = partition(z, nz, arch, 3) + λl = Rx == 1 ? λ : partition_coordinate(longitude, nλ, arch, 1) + φl = Ry == 1 ? φ : partition_coordinate(latitude, nφ, arch, 2) + zl = Rz == 1 ? z : partition_coordinate(z, nz, arch, 3) # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, @@ -185,9 +185,9 @@ function reconstruct_global_grid(grid::DistributedRectilinearGrid) z = cpu_face_constructor_z(grid) ## This will not work with 3D parallelizations!! - xG = Rx == 1 ? x : assemble(x, nx, Rx, ri, rj, rk, arch.communicator) - yG = Ry == 1 ? y : assemble(y, ny, Ry, rj, ri, rk, arch.communicator) - zG = Rz == 1 ? z : assemble(z, nz, Rz, rk, ri, rj, arch.communicator) + xG = Rx == 1 ? x : assemble_coordinate(x, nx, Rx, ri, rj, rk, arch.communicator) + yG = Ry == 1 ? y : assemble_coordinate(y, ny, Ry, rj, ri, rk, arch.communicator) + zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) child_arch = child_architecture(arch) @@ -228,9 +228,9 @@ function reconstruct_global_grid(grid::DistributedLatitudeLongitudeGrid) z = cpu_face_constructor_z(grid) ## This will not work with 3D parallelizations!! - λG = Rx == 1 ? λ : assemble(λ, nλ, Rx, ri, rj, rk, arch.communicator) - φG = Ry == 1 ? φ : assemble(φ, nφ, Ry, rj, ri, rk, arch.communicator) - zG = Rz == 1 ? z : assemble(z, nz, Rz, rk, ri, rj, arch.communicator) + λG = Rx == 1 ? λ : assemble_coordinate(λ, nλ, Rx, ri, rj, rk, arch.communicator) + φG = Ry == 1 ? φ : assemble_coordinate(φ, nφ, Ry, rj, ri, rk, arch.communicator) + zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) child_arch = child_architecture(arch) diff --git a/src/DistributedComputations/partition_assemble.jl b/src/DistributedComputations/partition_assemble.jl index 0e96374955..c8b90e3cbb 100644 --- a/src/DistributedComputations/partition_assemble.jl +++ b/src/DistributedComputations/partition_assemble.jl @@ -37,8 +37,8 @@ end # Partitioning (localization of global objects) and assembly (global assembly of local objects) # Used for grid constructors (cpu_face_constructor_x, cpu_face_constructor_y, cpu_face_constructor_z) -# which means that we need to repeat the value at the right boundary -function partition(c::AbstractVector, n, arch, idx) +# We need to repeat the value at the right boundary +function partition_coordinate(c::AbstractVector, n, arch, idx) nl = concatenate_local_sizes(n, arch, idx) r = arch.local_index[idx] # Allow for Face values @@ -49,7 +49,7 @@ function partition(c::AbstractVector, n, arch, idx) end end -function partition(c::Tuple, n, arch, idx) +function partition_coordinate(c::Tuple, n, arch, idx) nl = concatenate_local_sizes(n, arch, idx) N = sum(nl) R = arch.ranks[idx] @@ -65,14 +65,14 @@ function partition(c::Tuple, n, arch, idx) end """ - assemble(c::AbstractVector, n, R, r, r1, r2, comm) + assemble_coordinate(c::AbstractVector, n, R, r, r1, r2, comm) Builds a linear global coordinate vector given a local coordinate vector `c_local` a local number of elements `Nc`, number of ranks `Nr`, rank `r`, and `arch`itecture. Since we use a global reduction, only ranks at positions 1 in the other two directions `r1 == 1` and `r2 == 1` fill the 1D array. """ -function assemble(c_local::AbstractVector, n, R, r, r1, r2, comm) +function assemble_coordinate(c_local::AbstractVector, n, R, r, r1, r2, comm) nl = concatenate_local_sizes(n, R, r) c_global = zeros(eltype(c_local), sum(nl)+1) @@ -88,7 +88,7 @@ function assemble(c_local::AbstractVector, n, R, r, r1, r2, comm) end # Simple case, just take the first and the last core -function assemble(c::Tuple, n, R, r, r1, r2, comm) +function assemble_coordinate(c::Tuple, n, R, r, r1, r2, comm) c_global = zeros(Float64, 2) if r == 1 && r1 == 1 && r2 == 1 diff --git a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl index fdec0fcc83..01b899e87c 100644 --- a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl +++ b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl @@ -6,6 +6,7 @@ using Statistics using Oceananigans.BoundaryConditions using Oceananigans.DistributedComputations using Random +using JLD2 using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, use_only_active_interior_cells # Run with @@ -15,14 +16,11 @@ using Oceananigans.ImmersedBoundaries: ActiveCellsIBG, use_only_active_interior_ # ``` function run_simulation(nx, ny, arch; topology = (Periodic, Periodic, Bounded)) - grid = RectilinearGrid(arch; topology, size = (Nx, Ny, 1), extent=(4π, 4π, 0.5), halo=(7, 7, 7)) + grid = RectilinearGrid(arch; topology, size = (Nx, Ny, 10), extent=(4π, 4π, 0.5), halo=(8, 8, 8)) bottom(x, y) = (x > π && x < 3π/2 && y > π/2 && y < 3π/2) ? 1.0 : - grid.Lz - 1.0 grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom); active_cells_map = true) - @show grid isa ActiveCellsIBG - @show use_only_active_interior_cells(grid) - model = HydrostaticFreeSurfaceModel(; grid, momentum_advection = VectorInvariant(vorticity_scheme=WENO(order=9)), free_surface = SplitExplicitFreeSurface(substeps=10), @@ -65,8 +63,8 @@ function run_simulation(nx, ny, arch; topology = (Periodic, Periodic, Bounded)) MPI.Barrier(arch.communicator) end -Nx = 128 -Ny = 128 +Nx = 32 +Ny = 32 arch = Distributed(CPU(), partition = Partition(2, 2)) From 217e3af2fb81f81f1d6912560263e7692ba99b4f Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 6 Dec 2023 16:36:13 -0800 Subject: [PATCH 043/138] annotations --- src/Utils/kernel_launching.jl | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 1d0b744937..bdfffe6380 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -120,29 +120,39 @@ function launch!(arch, grid, workspec, kernel!, kernel_args...; only_active_cells = nothing, kwargs...) - workgroup, worksize = work_layout(grid, workspec; - include_right_boundaries, - reduced_dimensions, - location) + NVTX.@range "work layout" begin + workgroup, worksize = work_layout(grid, workspec; + include_right_boundaries, + reduced_dimensions, + location) + end - offset = offsets(workspec) + NVTX.@range "offsets" begin + offset = offsets(workspec) + end - if !isnothing(only_active_cells) - workgroup, worksize = active_cells_work_layout(workgroup, worksize, only_active_cells, grid) - offset = nothing + NVTX.@range "active cells layout" begin + if !isnothing(only_active_cells) + workgroup, worksize = active_cells_work_layout(workgroup, worksize, only_active_cells, grid) + offset = nothing + end end if worksize == 0 return nothing end - # We can only launch offset kernels with Static sizes!!!! - loop! = isnothing(offset) ? kernel!(Architectures.device(arch), workgroup, worksize) : - kernel!(Architectures.device(arch), StaticSize(workgroup), OffsetStaticSize(contiguousrange(worksize, offset))) + NVTX.@range "configuring kernel" begin + # We can only launch offset kernels with Static sizes!!!! + loop! = isnothing(offset) ? kernel!(Architectures.device(arch), workgroup, worksize) : + kernel!(Architectures.device(arch), StaticSize(workgroup), OffsetStaticSize(contiguousrange(worksize, offset))) + end @debug "Launching kernel $kernel! with worksize $worksize and offsets $offset from $workspec" - loop!(kernel_args...) + NVTX.@range "actual kernel" begin + loop!(kernel_args...) + end return nothing end From 8cf6453c04f83546528917f211954350c67156da Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 6 Dec 2023 16:40:42 -0800 Subject: [PATCH 044/138] using NVTX --- src/Utils/kernel_launching.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index bdfffe6380..ac5f7b5061 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -7,7 +7,7 @@ using Oceananigans.Utils using Oceananigans.Grids using Oceananigans.Grids: AbstractGrid - +using NVTX import Base struct KernelParameters{S, O} end From 3cc1468ff94dcca95eb6f84addc1f9db1841e17f Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:29:59 -0800 Subject: [PATCH 045/138] add NVTX --- Manifest.toml | 12 ++++++------ Project.toml | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index a596e1df16..827fabbfc6 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,8 +1,8 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.9.3" +julia_version = "1.9.4" manifest_format = "2.0" -project_hash = "72ed8b1b7715053c6d7b675f75dd867b9f153685" +project_hash = "21eb6b02d2870a916430d805acf3d926ca95d5b2" [[deps.AbstractFFTs]] deps = ["LinearAlgebra"] @@ -420,12 +420,12 @@ uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" +version = "0.6.4" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" +version = "8.4.0+0" [[deps.LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] @@ -434,7 +434,7 @@ uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" +version = "1.11.0+1" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -988,7 +988,7 @@ version = "5.8.0+0" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" +version = "1.52.0+1" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] diff --git a/Project.toml b/Project.toml index 8e753f2e63..f2316435f4 100644 --- a/Project.toml +++ b/Project.toml @@ -22,6 +22,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" +NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" PencilArrays = "0e08944d-e94e-41b1-9406-dcf66b6a9d2e" From 1c1ff63d737582f4ec5b2881bf24ec5bdc14b140 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Dec 2023 09:30:46 -0800 Subject: [PATCH 046/138] bugfix --- src/DistributedComputations/distributed_grids.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index b79705cf90..e8f80a0320 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -126,9 +126,9 @@ function LatitudeLongitudeGrid(arch::Distributed, TY = insert_connected_topology(topology[2], Ry, rj) TZ = insert_connected_topology(topology[3], Rz, rk) - λl = Rx == 1 ? λ : partition_coordinate(longitude, nλ, arch, 1) - φl = Ry == 1 ? φ : partition_coordinate(latitude, nφ, arch, 2) - zl = Rz == 1 ? z : partition_coordinate(z, nz, arch, 3) + λl = Rx == 1 ? longitude : partition_coordinate(longitude, nλ, arch, 1) + φl = Ry == 1 ? latitude : partition_coordinate(latitude, nφ, arch, 2) + zl = Rz == 1 ? z : partition_coordinate(z, nz, arch, 3) # Calculate all direction (which might be stretched) # A direction is regular if the domain passed is a Tuple{<:Real, <:Real}, @@ -228,9 +228,9 @@ function reconstruct_global_grid(grid::DistributedLatitudeLongitudeGrid) z = cpu_face_constructor_z(grid) ## This will not work with 3D parallelizations!! - λG = Rx == 1 ? λ : assemble_coordinate(λ, nλ, Rx, ri, rj, rk, arch.communicator) - φG = Ry == 1 ? φ : assemble_coordinate(φ, nφ, Ry, rj, ri, rk, arch.communicator) - zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) + λG = Rx == 1 ? longitude : assemble_coordinate(λ, nλ, Rx, ri, rj, rk, arch.communicator) + φG = Ry == 1 ? latitude : assemble_coordinate(φ, nφ, Ry, rj, ri, rk, arch.communicator) + zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) child_arch = child_architecture(arch) From e0bedee953ef16d05d3375fd241b9cdffafa5c72 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Dec 2023 09:57:09 -0800 Subject: [PATCH 047/138] bugfix --- src/DistributedComputations/distributed_grids.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index e8f80a0320..b6fe50b5fb 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -228,9 +228,9 @@ function reconstruct_global_grid(grid::DistributedLatitudeLongitudeGrid) z = cpu_face_constructor_z(grid) ## This will not work with 3D parallelizations!! - λG = Rx == 1 ? longitude : assemble_coordinate(λ, nλ, Rx, ri, rj, rk, arch.communicator) - φG = Ry == 1 ? latitude : assemble_coordinate(φ, nφ, Ry, rj, ri, rk, arch.communicator) - zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) + λG = Rx == 1 ? λ : assemble_coordinate(λ, nλ, Rx, ri, rj, rk, arch.communicator) + φG = Ry == 1 ? φ : assemble_coordinate(φ, nφ, Ry, rj, ri, rk, arch.communicator) + zG = Rz == 1 ? z : assemble_coordinate(z, nz, Rz, rk, ri, rj, arch.communicator) child_arch = child_architecture(arch) From b2f92ddd14be8b6837d9b5fa99d0c45184ef4803 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Dec 2023 13:45:01 -0800 Subject: [PATCH 048/138] utils --- .../split_explicit_free_surface_kernels.jl | 39 ++++++------ src/Utils/Utils.jl | 2 +- src/Utils/kernel_launching.jl | 60 ++++++++++--------- 3 files changed, 55 insertions(+), 46 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index aee662ea91..fa8709d8c8 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -372,20 +372,21 @@ function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, gr timestepper = settings.timestepper parameters = auxiliary.kernel_parameters + free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) + barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) + @unroll for substep in 1:N Base.@_inline_meta averaging_weight = weights[substep] - launch!(arch, grid, parameters, _split_explicit_free_surface!, - grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - timestepper) + free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - launch!(arch, grid, parameters, _split_explicit_barotropic_velocity!, - grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) + barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) end return nothing @@ -419,19 +420,21 @@ function iterate_split_explicit!(free_surface, grid, Δt) parameters = auxiliary.kernel_parameters - @unroll for substep in 1:Nsubsteps + free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) + barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) - averaging_weight = weights[substep] + @unroll for substep in 1:N + Base.@_inline_meta - launch!(arch, grid, parameters, split_explicit_free_surface_evolution_kernel!, - grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - timestepper) + averaging_weight = weights[substep] - launch!(arch, grid, parameters, split_explicit_barotropic_velocity_evolution_kernel!, - grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) + free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + + barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) end return nothing diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 6c6543692c..761c8d23a6 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -1,6 +1,6 @@ module Utils -export launch_config, work_layout, launch!, KernelParameters +export configured_kernel, work_layout, launch!, KernelParameters export prettytime, pretty_filesize export tupleit, parenttuple, datatuple, datatuples export validate_intervals, time_to_run diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index ac5f7b5061..80a8307a99 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -120,43 +120,49 @@ function launch!(arch, grid, workspec, kernel!, kernel_args...; only_active_cells = nothing, kwargs...) - NVTX.@range "work layout" begin - workgroup, worksize = work_layout(grid, workspec; - include_right_boundaries, - reduced_dimensions, - location) - end - NVTX.@range "offsets" begin - offset = offsets(workspec) - end + loop! = configured_kernel(arch, grid, workspec, kernel!; + include_right_boundaries, + reduced_dimensions, + location, + only_active_cells, + kwargs...) + + loop!(kernel_args...) + + return nothing +end - NVTX.@range "active cells layout" begin - if !isnothing(only_active_cells) - workgroup, worksize = active_cells_work_layout(workgroup, worksize, only_active_cells, grid) - offset = nothing - end +function configured_kernel(arch, grid, workspec, kernel!; + include_right_boundaries = false, + reduced_dimensions = (), + location = nothing, + only_active_cells = nothing, + kwargs...) + + workgroup, worksize = work_layout(grid, workspec; + include_right_boundaries, + reduced_dimensions, + location) + + offset = offsets(workspec) + + if !isnothing(only_active_cells) + workgroup, worksize = active_cells_work_layout(workgroup, worksize, only_active_cells, grid) + offset = nothing end if worksize == 0 return nothing end - NVTX.@range "configuring kernel" begin - # We can only launch offset kernels with Static sizes!!!! - loop! = isnothing(offset) ? kernel!(Architectures.device(arch), workgroup, worksize) : - kernel!(Architectures.device(arch), StaticSize(workgroup), OffsetStaticSize(contiguousrange(worksize, offset))) - end + # We can only launch offset kernels with Static sizes!!!! + loop! = isnothing(offset) ? kernel!(Architectures.device(arch), workgroup, worksize) : + kernel!(Architectures.device(arch), StaticSize(workgroup), OffsetStaticSize(contiguousrange(worksize, offset))) - @debug "Launching kernel $kernel! with worksize $worksize and offsets $offset from $workspec" - - NVTX.@range "actual kernel" begin - loop!(kernel_args...) - end - - return nothing + return loop! end - + # When dims::Val @inline launch!(arch, grid, ::Val{workspec}, args...; kwargs...) where workspec = launch!(arch, grid, workspec, args...; kwargs...) From 00458ab1596f358370345a89471da89821e805ab Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Dec 2023 20:34:21 -0800 Subject: [PATCH 049/138] try like this --- .../split_explicit_free_surface_kernels.jl | 4 ++-- src/Utils/kernel_launching.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index fa8709d8c8..50a40bba6e 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -420,8 +420,8 @@ function iterate_split_explicit!(free_surface, grid, Δt) parameters = auxiliary.kernel_parameters - free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) - barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) + free_surface_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_free_surface!) + barotropic_velocity_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_barotropic_velocity!) @unroll for substep in 1:N Base.@_inline_meta diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 80a8307a99..543f977388 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -127,7 +127,7 @@ function launch!(arch, grid, workspec, kernel!, kernel_args...; location, only_active_cells, kwargs...) - + loop!(kernel_args...) return nothing From 08a86b52e0535c60857f26616554fc1cf2efc966 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Dec 2023 21:11:04 -0800 Subject: [PATCH 050/138] text like this --- .../split_explicit_free_surface_kernels.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 50a40bba6e..996d6be7df 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -372,8 +372,8 @@ function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, gr timestepper = settings.timestepper parameters = auxiliary.kernel_parameters - free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) - barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) + free_surface_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_free_surface!) + barotropic_velocity_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_barotropic_velocity!) @unroll for substep in 1:N Base.@_inline_meta From e402f5c79788e1d693519efdfd09bf0ff8fb0438 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 8 Dec 2023 06:28:06 -0800 Subject: [PATCH 051/138] remove reduced fields --- .../split_explicit_free_surface.jl | 32 ++++---- .../split_explicit_free_surface_kernels.jl | 76 +++++++++---------- 2 files changed, 51 insertions(+), 57 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index fc4dbf33c8..ac64f01521 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -135,17 +135,17 @@ function SplitExplicitState(grid::AbstractGrid) ηᵐ⁻¹ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) ηᵐ⁻² = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) - U = Field((Face, Center, Nothing), grid) - V = Field((Center, Face, Nothing), grid) + U = ZFaceField(grid, indices = (:, :, size(grid, 3))) + V = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Uᵐ⁻¹ = Field((Face, Center, Nothing), grid) - Vᵐ⁻¹ = Field((Center, Face, Nothing), grid) + Uᵐ⁻¹ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Vᵐ⁻¹ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Uᵐ⁻² = Field((Face, Center, Nothing), grid) - Vᵐ⁻² = Field((Center, Face, Nothing), grid) + Uᵐ⁻² = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Vᵐ⁻² = ZFaceField(grid, indices = (:, :, size(grid, 3))) - U̅ = Field((Face, Center, Nothing), grid) - V̅ = Field((Center, Face, Nothing), grid) + U̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + V̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) return SplitExplicitState(; ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅) end @@ -183,21 +183,21 @@ Return the `SplitExplicitAuxiliaryFields` for `grid`. """ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) - Gᵁ = Field((Face, Center, Nothing), grid) - Gⱽ = Field((Center, Face, Nothing), grid) + Gᵁ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Gⱽ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Hᶠᶜ = Field((Face, Center, Nothing), grid) - Hᶜᶠ = Field((Center, Face, Nothing), grid) - Hᶜᶜ = Field((Center, Center, Nothing), grid) + Hᶠᶜ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Hᶜᶠ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Hᶜᶜ = ZFaceField(grid, indices = (:, :, size(grid, 3))) dz = GridMetricOperation((Face, Center, Center), Δz, grid) - sum!(Hᶠᶜ, dz) + Hᶠᶜ .= sum(dz; dims = 3) dz = GridMetricOperation((Center, Face, Center), Δz, grid) - sum!(Hᶜᶠ, dz) + Hᶜᶠ .= sum(dz; dims = 3) dz = GridMetricOperation((Center, Center, Center), Δz, grid) - sum!(Hᶜᶜ, dz) + Hᶜᶜ .= sum(dz; dims = 3) fill_halo_regions!((Hᶠᶜ, Hᶜᶠ, Hᶜᶜ)) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 996d6be7df..e3b93c7ae5 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -177,17 +177,17 @@ end TX, TY, _ = topology(grid) @inbounds begin - advance_previous_velocity!(i, j, 1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) - advance_previous_velocity!(i, j, 1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) + advance_previous_velocity!(i, j, k_top-1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) # ∂τ(U) = - ∇η + G - U[i, j, 1] += Δτ * (- g * Hᶠᶜ[i, j] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, 1]) - V[i, j, 1] += Δτ * (- g * Hᶜᶠ[i, j] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, 1]) + U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) + V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) # time-averaging - η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] - U̅[i, j, 1] += averaging_weight * U[i, j, 1] - V̅[i, j, 1] += averaging_weight * V[i, j, 1] + η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] + U̅[i, j, k_top-1] += averaging_weight * U[i, j, k_top-1] + V̅[i, j, k_top-1] += averaging_weight * V[i, j, k_top-1] end end @@ -195,14 +195,15 @@ end # u_Δz = u * Δz @kernel function _barotropic_mode_kernel!(U, V, grid, u, v) i, j = @index(Global, NTuple) + k_top = grid.Nz+1 # hand unroll first loop - @inbounds U[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] - @inbounds V[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] + @inbounds U[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] + @inbounds V[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] @unroll for k in 2:grid.Nz - @inbounds U[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] - @inbounds V[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] + @inbounds U[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] + @inbounds V[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] end end @@ -211,18 +212,18 @@ end @kernel function _barotropic_mode_kernel!(U, V, grid::ActiveSurfaceIBG, u, v) idx = @index(Global, Linear) i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) + k_top = grid.Nz+1 # hand unroll first loop - @inbounds U[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] - @inbounds V[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] + @inbounds U[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] + @inbounds V[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] @unroll for k in 2:grid.Nz - @inbounds U[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] - @inbounds V[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] + @inbounds U[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] + @inbounds V[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] end end -# may need to do Val(Nk) since it may not be known at compile compute_barotropic_mode!(U, V, grid, u, v) = launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v; only_active_cells = use_only_active_surface_cells(grid)) @@ -252,9 +253,11 @@ end @kernel function _barotropic_split_explicit_corrector!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) i, j, k = @index(Global, NTuple) + k_top = grid.Nz+1 + @inbounds begin - u[i, j, k] = u[i, j, k] + (U̅[i, j] - U[i, j]) / Hᶠᶜ[i, j] - v[i, j, k] = v[i, j, k] + (V̅[i, j] - V[i, j]) / Hᶜᶠ[i, j] + u[i, j, k] = u[i, j, k] + (U̅[i, j, k_top-1] - U[i, j, k_top-1]) / Hᶠᶜ[i, j, k_top-1] + v[i, j, k] = v[i, j, k] + (V̅[i, j, k_top-1] - V[i, j, k_top-1]) / Hᶜᶠ[i, j, k_top-1] end end @@ -332,17 +335,6 @@ const MINIMUM_SUBSTEPS = 5 @inline calculate_adaptive_settings(substepping::FTS, substeps) = weights_from_substeps(eltype(substepping.Δt_barotropic), substeps, substepping.averaging_kernel) -macro unroll_split_explicit_loop(exp) - lim2 = eval(exp.args[1].args[2].args[3]) - iterator = exp.args[1].args[1] - loop = quote - Base.Cartesian.@nexprs $lim2 $iterator -> $(exp.args[2]) - end - return quote - $(esc(loop)) - end -end - const FixedSubstepsSetting{N} = SplitExplicitSettings{<:FixedSubstepNumber{<:Any, <:NTuple{N, <:Any}}} where N const FixedSubstepsSplitExplicit{F} = SplitExplicitFreeSurface{<:Any, <:Any, <:Any, <:Any, <:FixedSubstepsSetting{N}} where N @@ -372,8 +364,8 @@ function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, gr timestepper = settings.timestepper parameters = auxiliary.kernel_parameters - free_surface_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_free_surface!) - barotropic_velocity_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_barotropic_velocity!) + free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) + barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) @unroll for substep in 1:N Base.@_inline_meta @@ -420,8 +412,8 @@ function iterate_split_explicit!(free_surface, grid, Δt) parameters = auxiliary.kernel_parameters - free_surface_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_free_surface!) - barotropic_velocity_kernel! = configured_kernel(arch, grid, :xy, _split_explicit_barotropic_velocity!) + free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) + barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) @unroll for substep in 1:N Base.@_inline_meta @@ -443,14 +435,15 @@ end # Calculate RHS for the barotopic time step. @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) i, j = @index(Global, NTuple) + k_top = grid.Nz+1 # hand unroll first loop - @inbounds Gᵁ[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) + @inbounds Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) @unroll for k in 2:grid.Nz - @inbounds Gᵁ[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) + @inbounds Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) end end @@ -458,14 +451,15 @@ end @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid::ActiveSurfaceIBG, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) idx = @index(Global, Linear) i, j = active_linear_index_to_tuple(idx, SurfaceMap(), grid) + k_top = grid.Nz+1 # hand unroll first loop - @inbounds Gᵁ[i, j, 1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, 1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) + @inbounds Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) @unroll for k in 2:grid.Nz - @inbounds Gᵁ[i, j, 1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, 1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) + @inbounds Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) + @inbounds Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) end end From 6cf89bc50a0810e6de9e5b51252c739c1f0ad5d2 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 8 Dec 2023 07:37:49 -0800 Subject: [PATCH 052/138] small test --- .../split_explicit_free_surface_kernels.jl | 44 +++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index e3b93c7ae5..b3e2eb8eac 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -101,9 +101,9 @@ end # Time stepping extrapolation U★, and η★ # AB3 step -@inline function U★(i, j, k, grid, ::AdamsBashforth3Scheme, ϕᵐ, ϕᵐ⁻¹, ϕᵐ⁻²) +@inline function U★(i, j, k, grid, ::AdamsBashforth3Scheme, Uᵐ, Uᵐ⁻¹, Uᵐ⁻²) FT = eltype(grid) - return @inbounds FT(α) * ϕᵐ[i, j, k] + FT(θ) * ϕᵐ⁻¹[i, j, k] + FT(β) * ϕᵐ⁻²[i, j, k] + return @inbounds FT(α) * Uᵐ[i, j, k] + FT(θ) * Uᵐ⁻¹[i, j, k] + FT(β) * Uᵐ⁻²[i, j, k] end @inline function η★(i, j, k, grid, ::AdamsBashforth3Scheme, ηᵐ⁺¹, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) @@ -112,7 +112,7 @@ end end # Forward Backward Step -@inline U★(i, j, k, grid, ::ForwardBackwardScheme, ϕ, args...) = @inbounds ϕ[i, j, k] +@inline U★(i, j, k, grid, ::ForwardBackwardScheme, U, args...) = @inbounds U[i, j, k] @inline η★(i, j, k, grid, ::ForwardBackwardScheme, η, args...) = @inbounds η[i, j, k] @inline advance_previous_velocity!(i, j, k, ::ForwardBackwardScheme, U, Uᵐ⁻¹, Uᵐ⁻²) = nothing @@ -137,38 +137,38 @@ end using Oceananigans.DistributedComputations: Distributed using Printf -@kernel function _split_explicit_free_surface!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) +@kernel function _split_explicit_free_surface!(grid, Δτ, η, U, V, timestepper) i, j = @index(Global, NTuple) - free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + free_surface_evolution!(i, j, grid, Δτ, η, U, V, timestepper) end -@inline function free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) +@inline function free_surface_evolution!(i, j, grid, Δτ, η, U, V, timestepper) k_top = grid.Nz+1 TX, TY, _ = topology(grid) @inbounds begin advance_previous_free_surface!(i, j, k_top, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) - η[i, j, k_top] -= Δτ * (div_xᶜᶜᶠ_U(i, j, k_top-1, grid, TX, U★, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + - div_yᶜᶜᶠ_V(i, j, k_top-1, grid, TY, U★, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²)) + η[i, j, k_top] -= Δτ * (div_xᶜᶜᶠ_U(i, j, k_top-1, grid, TX, U★, timestepper, U, 0, 0) + + div_yᶜᶜᶠ_V(i, j, k_top-1, grid, TY, U★, timestepper, V, 0, 0)) end return nothing end -@kernel function _split_explicit_barotropic_velocity!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², +@kernel function _split_explicit_barotropic_velocity!(grid, Δτ, η, U, V, η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) i, j = @index(Global, NTuple) - velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + velocity_evolution!(i, j, grid, Δτ, η, U, V, η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper ) + timestepper) end -@inline function velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², +@inline function velocity_evolution!(i, j, grid, Δτ, η, U, V, η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper ) @@ -177,12 +177,12 @@ end TX, TY, _ = topology(grid) @inbounds begin - advance_previous_velocity!(i, j, k_top-1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) - advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) + # advance_previous_velocity!(i, j, k_top-1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + # advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) # ∂τ(U) = - ∇η + G - U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) - V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) + U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j, k_top-1] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, 0, 0, 0) + Gᵁ[i, j, k_top-1]) + V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j, k_top-1] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, 0, 0, 0) + Gⱽ[i, j, k_top-1]) # time-averaging η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] @@ -372,10 +372,9 @@ function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, gr averaging_weight = weights[substep] - free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - - barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + free_surface_kernel!(grid, Δτᴮ, η, U, V, timestepper) + + barotropic_velocity_kernel!(grid, Δτᴮ, η, U, V, η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) @@ -420,10 +419,9 @@ function iterate_split_explicit!(free_surface, grid, Δt) averaging_weight = weights[substep] - free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + free_surface_kernel!(grid, Δτᴮ, η, U, V, timestepper) - barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + barotropic_velocity_kernel!(grid, Δτᴮ, η, U, V, η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) From 37698738c27ba5c86f34d164fee377cbd0924a94 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:56:21 -0500 Subject: [PATCH 053/138] small change --- .../split_explicit_free_surface_kernels.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index b3e2eb8eac..b1ae2f1476 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -272,7 +272,6 @@ function barotropic_split_explicit_corrector!(u, v, free_surface, grid) # !!!! reusing U and V for this storage since last timestep doesn't matter compute_barotropic_mode!(U, V, grid, u, v) # add in "good" barotropic mode - launch!(arch, grid, :xyz, _barotropic_split_explicit_corrector!, u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) From d60b64322192bf4a22b576d76cba353faaca2a61 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:12:26 -0500 Subject: [PATCH 054/138] nvtx on fill halos --- src/DistributedComputations/halo_communication.jl | 5 ++++- .../split_explicit_free_surface_kernels.jl | 1 - .../update_hydrostatic_free_surface_model_state.jl | 7 +++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl index 87865c75ab..0e27c0352f 100644 --- a/src/DistributedComputations/halo_communication.jl +++ b/src/DistributedComputations/halo_communication.jl @@ -189,7 +189,6 @@ function fill_halo_event!(c, fill_halos!, bcs, indices, loc, arch, grid::Distrib if !only_local_halos # Then we need to fill the `send` buffers fill_send_buffers!(c, buffers, grid, Val(buffer_side)) - sync_device!(arch) end # Calculate size and offset of the fill_halo kernel @@ -244,6 +243,8 @@ for (side, opposite_side) in zip([:west, :south], [:east, :north]) function $fill_both_halo!(c, bc_side::DCBCT, bc_opposite_side::DCBCT, size, offset, loc, arch::Distributed, grid::DistributedGrid, buffers, args...; only_local_halos = false, kwargs...) + sync_device!(arch) + only_local_halos && return nothing @assert bc_side.condition.from == bc_opposite_side.condition.from # Extra protection in case of bugs @@ -273,6 +274,8 @@ for side in [:west, :east, :south, :north] function $fill_side_halo!(c, bc_side::DCBCT, size, offset, loc, arch::Distributed, grid::DistributedGrid, buffers, args...; only_local_halos = false, kwargs...) + sync_device!(arch) + only_local_halos && return nothing child_arch = child_architecture(arch) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index b1ae2f1476..7b2e5507ee 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -161,7 +161,6 @@ end Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) i, j = @index(Global, NTuple) - velocity_evolution!(i, j, grid, Δτ, η, U, V, η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index 4477505945..7613b37745 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -12,6 +12,7 @@ import Oceananigans.TimeSteppers: update_state! import Oceananigans.Models.NonhydrostaticModels: compute_auxiliaries! using Oceananigans.Models: update_model_field_time_series! +using NVTX compute_auxiliary_fields!(auxiliary_fields) = Tuple(compute!(a) for a in auxiliary_fields) @@ -35,8 +36,10 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp # Update possible FieldTimeSeries used in the model @apply_regionally update_model_field_time_series!(model, model.clock) - fill_halo_regions!(prognostic_fields(model), model.clock, fields(model); async = true) - + NVTX.@range "fill_halo_regions!" begin + fill_halo_regions!(prognostic_fields(model), model.clock, fields(model); async = true) + end + @apply_regionally replace_horizontal_vector_halos!(model.velocities, model.grid) @apply_regionally compute_auxiliaries!(model) From 1b0a4404ac24eddee562c6511a64f31790ffcafa Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:14:13 -0500 Subject: [PATCH 055/138] all NVTX --- .../halo_communication.jl | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl index 0e27c0352f..3a0f83d701 100644 --- a/src/DistributedComputations/halo_communication.jl +++ b/src/DistributedComputations/halo_communication.jl @@ -22,6 +22,8 @@ import Oceananigans.BoundaryConditions: fill_south_and_north_halo!, fill_bottom_and_top_halo! +using NVTX + ##### ##### MPI tags for halo communication BCs ##### @@ -104,7 +106,9 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::Distributed number_of_tasks = length(fill_halos!) for task = 1:number_of_tasks - fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; kwargs...) + NVTX.@range "fill_halo_event" begin + fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; kwargs...) + end end fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; kwargs...) @@ -187,15 +191,21 @@ function fill_halo_event!(c, fill_halos!, bcs, indices, loc, arch, grid::Distrib buffer_side = communication_side(Val(fill_halos!)) - if !only_local_halos # Then we need to fill the `send` buffers - fill_send_buffers!(c, buffers, grid, Val(buffer_side)) + NVTX.@range "fill_send_halo" begin + if !only_local_halos # Then we need to fill the `send` buffers + fill_send_buffers!(c, buffers, grid, Val(buffer_side)) + end end # Calculate size and offset of the fill_halo kernel # We assume that the kernel size is the same for west and east boundaries, # south and north boundaries and bottom and top boundaries - size = fill_halo_size(c, fill_halos!, indices, bcs[1], loc, grid) - offset = fill_halo_offset(size, fill_halos!, indices) + NVTX.@range "fill_halo_size" begin + size = fill_halo_size(c, fill_halos!, indices, bcs[1], loc, grid) + end + NVTX.@range "fill_halo_offsets" begin + offset = fill_halo_offset(size, fill_halos!, indices) + end requests = fill_halos!(c, bcs..., size, offset, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) From 6f9d400b57c0fc87f142fecbcd4e9ed1eddd23da Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:15:10 -0500 Subject: [PATCH 056/138] fill it all --- src/DistributedComputations/halo_communication.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl index 3a0f83d701..e80a448155 100644 --- a/src/DistributedComputations/halo_communication.jl +++ b/src/DistributedComputations/halo_communication.jl @@ -207,10 +207,14 @@ function fill_halo_event!(c, fill_halos!, bcs, indices, loc, arch, grid::Distrib offset = fill_halo_offset(size, fill_halos!, indices) end - requests = fill_halos!(c, bcs..., size, offset, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) - - pool_requests_or_complete_comm!(c, arch, grid, buffers, requests, async, buffer_side) + NVTX.@range "actual fill_halos!" begin + requests = fill_halos!(c, bcs..., size, offset, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) + end + NVTX.@range "pool_request" begin + pool_requests_or_complete_comm!(c, arch, grid, buffers, requests, async, buffer_side) + end + return nothing end From ea5e56bba0d89817b21ce52d846aa039f241924c Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:26:55 -0500 Subject: [PATCH 057/138] check it out --- .../split_explicit_free_surface.jl | 30 ++++++++------ .../split_explicit_free_surface_kernels.jl | 39 +++++++++++-------- 2 files changed, 40 insertions(+), 29 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index ac64f01521..7ebccb14aa 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -79,7 +79,7 @@ SplitExplicitFreeSurface(FT::DataType = Float64; gravitational_acceleration = g_ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid) η = FreeSurfaceDisplacementField(velocities, free_surface, grid) - return SplitExplicitFreeSurface(η, SplitExplicitState(grid), + return SplitExplicitFreeSurface(η, SplitExplicitState(grid, free_surface.settings.timestepper), SplitExplicitAuxiliaryFields(grid), free_surface.gravitational_acceleration, free_surface.settings) @@ -128,24 +128,23 @@ Note that `η̅` is solely used for setting the `η` at the next substep iterati acts as a filter for `η`. Values with superscripts `m-1` and `m-2` correspond to previous stored time steps to allow using a higher-order time stepping scheme, e.g., `AdamsBashforth3Scheme`. """ -function SplitExplicitState(grid::AbstractGrid) +function SplitExplicitState(grid::AbstractGrid, timestepper) η̅ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) - ηᵐ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) - ηᵐ⁻¹ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) - ηᵐ⁻² = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) + ηᵐ = auxiliary_free_surface_field(grid, timestepper) + ηᵐ⁻¹ = auxiliary_free_surface_field(grid, timestepper) + ηᵐ⁻² = auxiliary_free_surface_field(grid, timestepper) U = ZFaceField(grid, indices = (:, :, size(grid, 3))) V = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Uᵐ⁻¹ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Vᵐ⁻¹ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Uᵐ⁻¹ = auxiliary_barotropic_velocity_field(grid, timestepper) + Vᵐ⁻¹ = auxiliary_barotropic_velocity_field(grid, timestepper) + Uᵐ⁻² = auxiliary_barotropic_velocity_field(grid, timestepper) + Vᵐ⁻² = auxiliary_barotropic_velocity_field(grid, timestepper) - Uᵐ⁻² = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Vᵐ⁻² = ZFaceField(grid, indices = (:, :, size(grid, 3))) - - U̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - V̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + U̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + V̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) return SplitExplicitState(; ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅) end @@ -221,6 +220,13 @@ end struct AdamsBashforth3Scheme end struct ForwardBackwardScheme end + +auxiliary_free_surface_field(grid, ::AdamsBashforth3Scheme) = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) +auxiliary_free_surface_field(grid, ::ForwardBackwardScheme) = nothing + +auxiliary_barotropic_velocity_field(grid, ::AdamsBashforth3Scheme) = ZFaceField(grid, indices = (:, :, size(grid, 3))) +auxiliary_barotropic_velocity_field(grid, ::ForwardBackwardScheme) = nothing + # (p = 2, q = 4, r = 0.18927) minimize dispersion error from Shchepetkin and McWilliams (2005): https://doi.org/10.1016/j.ocemod.2004.08.002 @inline function averaging_shape_function(τ::FT; p = 2, q = 4, r = FT(0.18927)) where FT τ₀ = (p + 2) * (p + q + 2) / (p + 1) / (p + q + 1) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 7b2e5507ee..7bda6ca567 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -137,51 +137,54 @@ end using Oceananigans.DistributedComputations: Distributed using Printf -@kernel function _split_explicit_free_surface!(grid, Δτ, η, U, V, timestepper) +@kernel function _split_explicit_free_surface!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) i, j = @index(Global, NTuple) - free_surface_evolution!(i, j, grid, Δτ, η, U, V, timestepper) + free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) end -@inline function free_surface_evolution!(i, j, grid, Δτ, η, U, V, timestepper) +@inline function free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) k_top = grid.Nz+1 TX, TY, _ = topology(grid) @inbounds begin advance_previous_free_surface!(i, j, k_top, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) - η[i, j, k_top] -= Δτ * (div_xᶜᶜᶠ_U(i, j, k_top-1, grid, TX, U★, timestepper, U, 0, 0) + - div_yᶜᶜᶠ_V(i, j, k_top-1, grid, TY, U★, timestepper, V, 0, 0)) + η[i, j, k_top] -= Δτ * (div_xᶜᶜᶠ_U(i, j, k_top-1, grid, TX, U★, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + + div_yᶜᶜᶠ_V(i, j, k_top-1, grid, TY, U★, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²)) end return nothing end -@kernel function _split_explicit_barotropic_velocity!(grid, Δτ, η, U, V, +@kernel function _split_explicit_barotropic_velocity!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) i, j = @index(Global, NTuple) - velocity_evolution!(i, j, grid, Δτ, η, U, V, + velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) end -@inline function velocity_evolution!(i, j, grid, Δτ, η, U, V, +@inline function velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper ) + timestepper) k_top = grid.Nz+1 TX, TY, _ = topology(grid) @inbounds begin - # advance_previous_velocity!(i, j, k_top-1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) - # advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) + advance_previous_velocity!(i, j, k_top-1, timestepper, U, Uᵐ⁻¹, Uᵐ⁻²) + advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) # ∂τ(U) = - ∇η + G - U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j, k_top-1] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, 0, 0, 0) + Gᵁ[i, j, k_top-1]) - V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j, k_top-1] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, 0, 0, 0) + Gⱽ[i, j, k_top-1]) + U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j, k_top-1] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) + V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j, k_top-1] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) # time-averaging η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] @@ -370,9 +373,10 @@ function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, gr averaging_weight = weights[substep] - free_surface_kernel!(grid, Δτᴮ, η, U, V, timestepper) + free_surface_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - barotropic_velocity_kernel!(grid, Δτᴮ, η, U, V, + barotropic_velocity_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) @@ -417,9 +421,10 @@ function iterate_split_explicit!(free_surface, grid, Δt) averaging_weight = weights[substep] - free_surface_kernel!(grid, Δτᴮ, η, U, V, timestepper) + free_surface_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - barotropic_velocity_kernel!(grid, Δτᴮ, η, U, V, + barotropic_velocity_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) From 47dd5698ed12f6735600bd497c2194472f42fc6a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:56:51 -0500 Subject: [PATCH 058/138] bugfixxed --- .../distributed_split_explicit_free_surface.jl | 2 +- .../HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl | 2 +- src/MultiRegion/multi_region_split_explicit_free_surface.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl index 679889a888..6f5067e555 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl @@ -75,7 +75,7 @@ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid::D η = ZFaceField(new_grid, indices = (:, :, size(new_grid, 3)+1)) return SplitExplicitFreeSurface(η, - SplitExplicitState(new_grid), + SplitExplicitState(new_grid, settings.timestepper), SplitExplicitAuxiliaryFields(new_grid), free_surface.gravitational_acceleration, free_surface.settings) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 7ebccb14aa..ca549d94b3 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -120,7 +120,7 @@ Base.@kwdef struct SplitExplicitState{𝒞𝒞, ℱ𝒞, 𝒞ℱ} end """ - SplitExplicitState(grid) + SplitExplicitState(grid, timestepper) Return the split-explicit state for `grid`. diff --git a/src/MultiRegion/multi_region_split_explicit_free_surface.jl b/src/MultiRegion/multi_region_split_explicit_free_surface.jl index 7349324fec..c645452038 100644 --- a/src/MultiRegion/multi_region_split_explicit_free_surface.jl +++ b/src/MultiRegion/multi_region_split_explicit_free_surface.jl @@ -55,7 +55,7 @@ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid::M η = ZFaceField(new_grid, indices = (:, :, size(new_grid, 3)+1)) return SplitExplicitFreeSurface(η, - SplitExplicitState(new_grid), + SplitExplicitState(new_grid, free_surface.settings.timestepper), SplitExplicitAuxiliaryFields(new_grid), free_surface.gravitational_acceleration, free_surface.settings) From 62dad92b1653ac250ab81cb3111d1dd543ed6e1d Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 17:03:40 -0500 Subject: [PATCH 059/138] bugfixed --- .../split_explicit_free_surface.jl | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index ca549d94b3..d8173550da 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -92,31 +92,31 @@ A type containing the state fields for the split-explicit free surface. $(FIELDS) """ -Base.@kwdef struct SplitExplicitState{𝒞𝒞, ℱ𝒞, 𝒞ℱ} +Base.@kwdef struct SplitExplicitState{CC, ACC, FC, AFC, CF, ACF} "The free surface at time `m`. (`ReducedField` over ``z``)" - ηᵐ :: 𝒞𝒞 + ηᵐ :: ACC "The free surface at time `m-1`. (`ReducedField` over ``z``)" - ηᵐ⁻¹ :: 𝒞𝒞 + ηᵐ⁻¹ :: ACC "The free surface at time `m-2`. (`ReducedField` over ``z``)" - ηᵐ⁻² :: 𝒞𝒞 + ηᵐ⁻² :: ACC "The barotropic zonal velocity at time `m`. (`ReducedField` over ``z``)" - U :: ℱ𝒞 + U :: FC "The barotropic zonal velocity at time `m-1`. (`ReducedField` over ``z``)" - Uᵐ⁻¹ :: ℱ𝒞 + Uᵐ⁻¹ :: AFC "The barotropic zonal velocity at time `m-2`. (`ReducedField` over ``z``)" - Uᵐ⁻² :: ℱ𝒞 + Uᵐ⁻² :: AFC "The barotropic meridional velocity at time `m`. (`ReducedField` over ``z``)" - V :: 𝒞ℱ + V :: CF "The barotropic meridional velocity at time `m-1`. (`ReducedField` over ``z``)" - Vᵐ⁻¹ :: 𝒞ℱ + Vᵐ⁻¹ :: ACF "The barotropic meridional velocity at time `m-2`. (`ReducedField` over ``z``)" - Vᵐ⁻² :: 𝒞ℱ + Vᵐ⁻² :: ACF "The time-filtered free surface. (`ReducedField` over ``z``)" - η̅ :: 𝒞𝒞 + η̅ :: CC "The time-filtered barotropic zonal velocity. (`ReducedField` over ``z``)" - U̅ :: ℱ𝒞 + U̅ :: FC "The time-filtered barotropic meridional velocity. (`ReducedField` over ``z``)" - V̅ :: 𝒞ℱ + V̅ :: FC end """ From 9d5ada270477453ce4e71959b0cd8521156ed6c7 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 11 Dec 2023 20:42:34 -0500 Subject: [PATCH 060/138] bugfix --- .../split_explicit_free_surface_kernels.jl | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 7bda6ca567..154756ad5b 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -230,12 +230,23 @@ compute_barotropic_mode!(U, V, grid, u, v) = launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v; only_active_cells = use_only_active_surface_cells(grid)) -function initialize_free_surface_state!(free_surface_state, η) - state = free_surface_state +function initialize_free_surface_state!(state, η, timestepper) parent(state.U) .= parent(state.U̅) parent(state.V) .= parent(state.V̅) + initialize_auxiliary_state!(state, η, timestepper) + + fill!(state.η̅, 0) + fill!(state.U̅, 0) + fill!(state.V̅, 0) + + return nothing +end + +initialize_auxiliary_state!(state, η, ::ForwardBackwardScheme) = nothing + +function initialize_auxiliary_state!(state, η, timestepper) parent(state.Uᵐ⁻¹) .= parent(state.U̅) parent(state.Vᵐ⁻¹) .= parent(state.V̅) @@ -246,10 +257,6 @@ function initialize_free_surface_state!(free_surface_state, η) parent(state.ηᵐ⁻¹) .= parent(η) parent(state.ηᵐ⁻²) .= parent(η) - fill!(state.η̅, 0) - fill!(state.U̅, 0) - fill!(state.V̅, 0) - return nothing end @@ -302,7 +309,7 @@ function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurfac # reset free surface averages @apply_regionally begin - initialize_free_surface_state!(free_surface.state, free_surface.η) + initialize_free_surface_state!(free_surface.state, free_surface.η, free_surface.settings.timestepper) # Solve for the free surface at tⁿ⁺¹ iterate_split_explicit!(free_surface, free_surface_grid, Δt) # Reset eta for the next timestep @@ -373,9 +380,9 @@ function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, gr averaging_weight = weights[substep] - free_surface_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - barotropic_velocity_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, @@ -421,9 +428,9 @@ function iterate_split_explicit!(free_surface, grid, Δt) averaging_weight = weights[substep] - free_surface_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) + free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - barotropic_velocity_kernel!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, From 76bfb5eb30eaf13dbb5c00e1fe4fc74ffe1d8178 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 10:39:55 -0500 Subject: [PATCH 061/138] annotate the convert --- src/Architectures.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Architectures.jl b/src/Architectures.jl index 6d3da62098..95c9128b4b 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -8,6 +8,11 @@ using CUDA using KernelAbstractions using Adapt using OffsetArrays +using NVTX + +NVTX.@annotate "cudaconvert function" function KernelAbstractions.argconvert(k::KernelAbstractions.Kernel{CUDABackend}, arg) + CUDA.cudaconvert(arg) +end """ AbstractArchitecture From 3f645ce5e68f44f5fe911a3208b97891bf04cfb0 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 10:40:26 -0500 Subject: [PATCH 062/138] bugfix --- src/Architectures.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 95c9128b4b..49cb7fd56d 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -10,7 +10,7 @@ using Adapt using OffsetArrays using NVTX -NVTX.@annotate "cudaconvert function" function KernelAbstractions.argconvert(k::KernelAbstractions.Kernel{CUDABackend}, arg) +NVTX.@annotate "cudaconvert function" function KernelAbstractions.argconvert(k::KernelAbstractions.Kernel{CUDAl.CUDABackend}, arg) CUDA.cudaconvert(arg) end From 324aaefe11636d08e960c33c62c2bf4711e8e285 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 10:41:40 -0500 Subject: [PATCH 063/138] bugfix --- src/Architectures.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 49cb7fd56d..36e340d648 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -10,7 +10,7 @@ using Adapt using OffsetArrays using NVTX -NVTX.@annotate "cudaconvert function" function KernelAbstractions.argconvert(k::KernelAbstractions.Kernel{CUDAl.CUDABackend}, arg) +NVTX.@annotate "cudaconvert function" function KernelAbstractions.argconvert(k::KernelAbstractions.Kernel{CUDA.CUDABackend}, arg) CUDA.cudaconvert(arg) end From 67df158aff546983749dac1f781daf7c3bec1052 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 11:25:49 -0500 Subject: [PATCH 064/138] add cudaconvert --- src/Architectures.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Architectures.jl b/src/Architectures.jl index 36e340d648..a5803cc163 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -14,6 +14,14 @@ NVTX.@annotate "cudaconvert function" function KernelAbstractions.argconvert(k:: CUDA.cudaconvert(arg) end +import CUDA: cudaconvert + +NVTX.@annotate "cudaconvert function" function CUDA.cudaconvert(arg) + NVTX.@range "inside convert function" begin + Adapt.adapt(CUDA.KernelAdaptor(), arg) + end +end + """ AbstractArchitecture From 74d3badaf1ade385da3fa267088784f5af625a48 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 15:10:19 -0500 Subject: [PATCH 065/138] remove NVTX --- src/Architectures.jl | 12 --------- .../halo_communication.jl | 26 +++++-------------- ...te_hydrostatic_free_surface_model_state.jl | 4 +-- 3 files changed, 8 insertions(+), 34 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index a5803cc163..54d7a95fc1 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -10,18 +10,6 @@ using Adapt using OffsetArrays using NVTX -NVTX.@annotate "cudaconvert function" function KernelAbstractions.argconvert(k::KernelAbstractions.Kernel{CUDA.CUDABackend}, arg) - CUDA.cudaconvert(arg) -end - -import CUDA: cudaconvert - -NVTX.@annotate "cudaconvert function" function CUDA.cudaconvert(arg) - NVTX.@range "inside convert function" begin - Adapt.adapt(CUDA.KernelAdaptor(), arg) - end -end - """ AbstractArchitecture diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl index e80a448155..5d011acafe 100644 --- a/src/DistributedComputations/halo_communication.jl +++ b/src/DistributedComputations/halo_communication.jl @@ -106,9 +106,7 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::Distributed number_of_tasks = length(fill_halos!) for task = 1:number_of_tasks - NVTX.@range "fill_halo_event" begin - fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; kwargs...) - end + fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; kwargs...) end fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; kwargs...) @@ -191,29 +189,19 @@ function fill_halo_event!(c, fill_halos!, bcs, indices, loc, arch, grid::Distrib buffer_side = communication_side(Val(fill_halos!)) - NVTX.@range "fill_send_halo" begin - if !only_local_halos # Then we need to fill the `send` buffers - fill_send_buffers!(c, buffers, grid, Val(buffer_side)) - end + if !only_local_halos # Then we need to fill the `send` buffers + fill_send_buffers!(c, buffers, grid, Val(buffer_side)) end # Calculate size and offset of the fill_halo kernel # We assume that the kernel size is the same for west and east boundaries, # south and north boundaries and bottom and top boundaries - NVTX.@range "fill_halo_size" begin - size = fill_halo_size(c, fill_halos!, indices, bcs[1], loc, grid) - end - NVTX.@range "fill_halo_offsets" begin - offset = fill_halo_offset(size, fill_halos!, indices) - end + size = fill_halo_size(c, fill_halos!, indices, bcs[1], loc, grid) + offset = fill_halo_offset(size, fill_halos!, indices) - NVTX.@range "actual fill_halos!" begin - requests = fill_halos!(c, bcs..., size, offset, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) - end + requests = fill_halos!(c, bcs..., size, offset, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) - NVTX.@range "pool_request" begin - pool_requests_or_complete_comm!(c, arch, grid, buffers, requests, async, buffer_side) - end + pool_requests_or_complete_comm!(c, arch, grid, buffers, requests, async, buffer_side) return nothing end diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index 7613b37745..43f53a766f 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -36,9 +36,7 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp # Update possible FieldTimeSeries used in the model @apply_regionally update_model_field_time_series!(model, model.clock) - NVTX.@range "fill_halo_regions!" begin - fill_halo_regions!(prognostic_fields(model), model.clock, fields(model); async = true) - end + fill_halo_regions!(prognostic_fields(model), model.clock, fields(model); async = true) @apply_regionally replace_horizontal_vector_halos!(model.velocities, model.grid) @apply_regionally compute_auxiliaries!(model) From 955d2c16645c89a664f58c3884e52236d3291b0e Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 15:49:51 -0500 Subject: [PATCH 066/138] model grid --- .../split_explicit_free_surface_kernels.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 154756ad5b..eb2b8bbbe0 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -477,8 +477,6 @@ end # Setting up the RHS for the barotropic step (tendencies of the barotopic velocity components) # This function is called after `calculate_tendency` and before `ab2_step_velocities!` function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) - - free_surface_grid = free_surface.η.grid # we start the time integration of η from the average ηⁿ Gu⁻ = model.timestepper.G⁻.u @@ -488,7 +486,7 @@ function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) auxiliary = free_surface.auxiliary - @apply_regionally setup_split_explicit_tendency!(auxiliary, free_surface_grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) + @apply_regionally setup_split_explicit_tendency!(auxiliary, model.grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) fields_to_fill = (auxiliary.Gᵁ, auxiliary.Gⱽ) fill_halo_regions!(fields_to_fill; async = true) From 15f60f73a2c79488faa3fea99607a3292c909f6a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:35:51 -0500 Subject: [PATCH 067/138] try like this? --- .../split_explicit_free_surface_kernels.jl | 96 +++++++------------ src/Utils/kernel_launching.jl | 34 +++++++ 2 files changed, 71 insertions(+), 59 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index eb2b8bbbe0..80a46304e0 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -193,6 +193,34 @@ end end end +@kernel function _iterate_split_explicit!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, weights, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper, + free_surface_kernel!, + barotropic_velocity_kernel!, + ::Val{N}) where N + + @unroll for substep in 1:N + Base.@_inline_meta + + averaging_weight = weights[substep] + + free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + timestepper; + dynamic_launch=true) + + barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper; + dynamic_launch=true) + end +end + # Barotropic Model Kernels # u_Δz = u * Δz @kernel function _barotropic_mode_kernel!(U, V, grid, u, v) @@ -346,52 +374,6 @@ const MINIMUM_SUBSTEPS = 5 const FixedSubstepsSetting{N} = SplitExplicitSettings{<:FixedSubstepNumber{<:Any, <:NTuple{N, <:Any}}} where N const FixedSubstepsSplitExplicit{F} = SplitExplicitFreeSurface{<:Any, <:Any, <:Any, <:Any, <:FixedSubstepsSetting{N}} where N -# For a fixed number of substeps it is possible to -function iterate_split_explicit!(free_surface::FixedSubstepsSplitExplicit{N}, grid, Δt) where N - arch = architecture(grid) - - η = free_surface.η - state = free_surface.state - auxiliary = free_surface.auxiliary - settings = free_surface.settings - g = free_surface.gravitational_acceleration - - weights = settings.substepping.averaging_weights - fractional_Δt = settings.substepping.fractional_step_size - - Δτᴮ = fractional_Δt * Δt # barotropic time step in seconds - - # unpack state quantities, parameters and forcing terms - U, V = state.U, state.V - Uᵐ⁻¹, Uᵐ⁻² = state.Uᵐ⁻¹, state.Uᵐ⁻² - Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² - ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² - η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ - Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ - - timestepper = settings.timestepper - parameters = auxiliary.kernel_parameters - - free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) - barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) - - @unroll for substep in 1:N - Base.@_inline_meta - - averaging_weight = weights[substep] - - free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - - barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) - end - - return nothing -end - function iterate_split_explicit!(free_surface, grid, Δt) arch = architecture(grid) @@ -423,19 +405,15 @@ function iterate_split_explicit!(free_surface, grid, Δt) free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) - @unroll for substep in 1:N - Base.@_inline_meta - - averaging_weight = weights[substep] - - free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) - - barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) - end + launch!(arch, grid, 1, _iterate_split_explicit!, + grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, weights, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper, + free_surface_kernel!, + barotropic_velocity_kernel!, + Val(N)) return nothing end diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 543f977388..6d98f5d131 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -279,3 +279,37 @@ function partition(kernel::OffsetKernel, inrange, ingroupsize) return iterspace, dynamic end +##### +##### Add Dynamic kernels to KA +##### + +using KernelAbstractions +using CUDA: CUDABackend + +const KA = KernelAbstractions + +function (obj::KA.Kernel{CUDABackend})(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) + backend = KA.backend(obj) + + ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange, workgroupsize) + # this might not be the final context, since we may tune the workgroupsize + ctx = KA.mkcontext(obj, ndrange, iterspace) + + maxthreads = prod(KA.get(KA.workgroupsize(obj))) + + kernel = @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads dynamic=dynamic_launch obj.f(ctx, args...) + + blocks = length(KA.blocks(iterspace)) + threads = length(KA.workitems(iterspace)) + + if blocks == 0 + return nothing + end + + # Launch kernel + kernel(ctx, args...; threads, blocks) + + return nothing +end + + From 3c8e34fe7c4f9fdd7cfff32b43a9c68c9b31afdc Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:46:20 -0500 Subject: [PATCH 068/138] bugfix --- src/Utils/kernel_launching.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 6d98f5d131..162e201e29 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -284,10 +284,13 @@ end ##### using KernelAbstractions -using CUDA: CUDABackend +using CUDA: CUDABackend, @cuda const KA = KernelAbstractions +(obj::KA.Kernel)(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) = + obj(args...; ndrange, workgroupsize) + function (obj::KA.Kernel{CUDABackend})(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) backend = KA.backend(obj) From 246c6d97d47eefa8cb69e466c238ed95127ee35e Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:54:27 -0500 Subject: [PATCH 069/138] fix --- src/Utils/kernel_launching.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 162e201e29..d925165509 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -300,7 +300,11 @@ function (obj::KA.Kernel{CUDABackend})(args...; ndrange=nothing, workgroupsize=n maxthreads = prod(KA.get(KA.workgroupsize(obj))) - kernel = @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads dynamic=dynamic_launch obj.f(ctx, args...) + kernel = if dynamic_launch + @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads dynamic=true obj.f(ctx, args...) + else + @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads obj.f(ctx, args...) + end blocks = length(KA.blocks(iterspace)) threads = length(KA.workitems(iterspace)) From 837a119c0ea6fa4ad1957fd7c2fe710e05577372 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:57:16 -0500 Subject: [PATCH 070/138] should work? --- src/Utils/kernel_launching.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index d925165509..2102cca4b0 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -301,7 +301,7 @@ function (obj::KA.Kernel{CUDABackend})(args...; ndrange=nothing, workgroupsize=n maxthreads = prod(KA.get(KA.workgroupsize(obj))) kernel = if dynamic_launch - @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads dynamic=true obj.f(ctx, args...) + @cuda launch=false dynamic=true obj.f(ctx, args...) else @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads obj.f(ctx, args...) end From 148a2c87f16e35fff6c28b47accb07091be0dd27 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:58:12 -0500 Subject: [PATCH 071/138] add here --- src/Utils/kernel_launching.jl | 42 ----------------------------------- 1 file changed, 42 deletions(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 2102cca4b0..f43be9a3d2 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -278,45 +278,3 @@ function partition(kernel::OffsetKernel, inrange, ingroupsize) return iterspace, dynamic end - -##### -##### Add Dynamic kernels to KA -##### - -using KernelAbstractions -using CUDA: CUDABackend, @cuda - -const KA = KernelAbstractions - -(obj::KA.Kernel)(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) = - obj(args...; ndrange, workgroupsize) - -function (obj::KA.Kernel{CUDABackend})(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) - backend = KA.backend(obj) - - ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange, workgroupsize) - # this might not be the final context, since we may tune the workgroupsize - ctx = KA.mkcontext(obj, ndrange, iterspace) - - maxthreads = prod(KA.get(KA.workgroupsize(obj))) - - kernel = if dynamic_launch - @cuda launch=false dynamic=true obj.f(ctx, args...) - else - @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads obj.f(ctx, args...) - end - - blocks = length(KA.blocks(iterspace)) - threads = length(KA.workitems(iterspace)) - - if blocks == 0 - return nothing - end - - # Launch kernel - kernel(ctx, args...; threads, blocks) - - return nothing -end - - From 0cf5c772b0303d4db694f2618bb83f6cc8f49193 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:58:19 -0500 Subject: [PATCH 072/138] add here --- src/Utils/Utils.jl | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 761c8d23a6..1704375cd5 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -42,4 +42,46 @@ include("multi_region_transformation.jl") include("coordinate_transformations.jl") include("sum_of_arrays.jl") + +##### +##### Add Dynamic kernels to KA +##### + +using KernelAbstractions +using CUDA: CUDABackend, @cuda + +const KA = KernelAbstractions + +(obj::KA.Kernel)(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) = + obj(args...; ndrange, workgroupsize) + +function (obj::KA.Kernel{CUDABackend})(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) + backend = KA.backend(obj) + + ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange, workgroupsize) + # this might not be the final context, since we may tune the workgroupsize + ctx = KA.mkcontext(obj, ndrange, iterspace) + + maxthreads = prod(KA.get(KA.workgroupsize(obj))) + + kernel = if dynamic_launch + @cuda launch=false dynamic=true obj.f(ctx, args...) + else + @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads obj.f(ctx, args...) + end + + blocks = length(KA.blocks(iterspace)) + threads = length(KA.workitems(iterspace)) + + if blocks == 0 + return nothing + end + + # Launch kernel + kernel(ctx, args...; threads, blocks) + + return nothing +end + + end # module From d1f4f8380347ef8d3b1bd5a46850e12c35db4f85 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 12 Dec 2023 23:01:37 -0500 Subject: [PATCH 073/138] bugfix --- .../split_explicit_free_surface_kernels.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 80a46304e0..a7345120f5 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -413,7 +413,7 @@ function iterate_split_explicit!(free_surface, grid, Δt) timestepper, free_surface_kernel!, barotropic_velocity_kernel!, - Val(N)) + Val(Nsubsteps)) return nothing end From 41a085785f4684f9b11b105a08ca0115e50c8155 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 13 Dec 2023 11:24:41 -0500 Subject: [PATCH 074/138] back to how it was --- src/BoundaryConditions/fill_halo_regions.jl | 2 +- .../split_explicit_free_surface_kernels.jl | 52 ++++++------------- src/Utils/Utils.jl | 42 --------------- 3 files changed, 16 insertions(+), 80 deletions(-) diff --git a/src/BoundaryConditions/fill_halo_regions.jl b/src/BoundaryConditions/fill_halo_regions.jl index 9097cc0254..67c0c66117 100644 --- a/src/BoundaryConditions/fill_halo_regions.jl +++ b/src/BoundaryConditions/fill_halo_regions.jl @@ -50,7 +50,7 @@ function fill_halo_regions!(c::MaybeTupledData, boundary_conditions, indices, lo arch = architecture(grid) - fill_halos!, bcs = permute_boundary_conditions(boundary_conditions) + fill_halos!, bcs = permute_boundary_conditions(boundary_conditions) number_of_tasks = length(fill_halos!) # Fill halo in the three permuted directions (1, 2, and 3), making sure dependencies are fulfilled diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index a7345120f5..7237052967 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -193,34 +193,6 @@ end end end -@kernel function _iterate_split_explicit!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, weights, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper, - free_surface_kernel!, - barotropic_velocity_kernel!, - ::Val{N}) where N - - @unroll for substep in 1:N - Base.@_inline_meta - - averaging_weight = weights[substep] - - free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - timestepper; - dynamic_launch=true) - - barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper; - dynamic_launch=true) - end -end - # Barotropic Model Kernels # u_Δz = u * Δz @kernel function _barotropic_mode_kernel!(U, V, grid, u, v) @@ -405,15 +377,21 @@ function iterate_split_explicit!(free_surface, grid, Δt) free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) - launch!(arch, grid, 1, _iterate_split_explicit!, - grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, weights, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper, - free_surface_kernel!, - barotropic_velocity_kernel!, - Val(Nsubsteps)) + @unroll for substep in 1:N + Base.@_inline_meta + + averaging_weight = weights[substep] + + free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + timestepper) + + barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, averaging_weight, + Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) + end return nothing end diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 1704375cd5..761c8d23a6 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -42,46 +42,4 @@ include("multi_region_transformation.jl") include("coordinate_transformations.jl") include("sum_of_arrays.jl") - -##### -##### Add Dynamic kernels to KA -##### - -using KernelAbstractions -using CUDA: CUDABackend, @cuda - -const KA = KernelAbstractions - -(obj::KA.Kernel)(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) = - obj(args...; ndrange, workgroupsize) - -function (obj::KA.Kernel{CUDABackend})(args...; ndrange=nothing, workgroupsize=nothing, dynamic_launch=false) - backend = KA.backend(obj) - - ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange, workgroupsize) - # this might not be the final context, since we may tune the workgroupsize - ctx = KA.mkcontext(obj, ndrange, iterspace) - - maxthreads = prod(KA.get(KA.workgroupsize(obj))) - - kernel = if dynamic_launch - @cuda launch=false dynamic=true obj.f(ctx, args...) - else - @cuda launch=false always_inline=backend.always_inline maxthreads=maxthreads obj.f(ctx, args...) - end - - blocks = length(KA.blocks(iterspace)) - threads = length(KA.workitems(iterspace)) - - if blocks == 0 - return nothing - end - - # Launch kernel - kernel(ctx, args...; threads, blocks) - - return nothing -end - - end # module From ee97dde9603fc1e87696e6f0a418fe33efa3ecfa Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:13:24 -0500 Subject: [PATCH 075/138] try it like this maybe? --- .../split_explicit_free_surface_kernels.jl | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 7237052967..36a4320148 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -5,6 +5,7 @@ using Oceananigans.Utils using Oceananigans.AbstractOperations: Δz using Oceananigans.BoundaryConditions using Oceananigans.Operators +using CUDA: cudaconvert using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f, SurfaceMap using Oceananigans.ImmersedBoundaries: mask_immersed_field!, use_only_active_surface_cells, use_only_active_interior_cells @@ -156,10 +157,9 @@ end return nothing end -@kernel function _split_explicit_barotropic_velocity!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², +@kernel function _split_explicit_barotropic_velocity!(averaging_weight, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + η̅, U̅, V̅, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, timestepper) i, j = @index(Global, NTuple) velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², @@ -377,20 +377,27 @@ function iterate_split_explicit!(free_surface, grid, Δt) free_surface_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_free_surface!) barotropic_velocity_kernel! = configured_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!) - @unroll for substep in 1:N - Base.@_inline_meta + η_args = (grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², + timestepper) - averaging_weight = weights[substep] + U_args = (grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², + U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², + η̅, U̅, V̅, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + timestepper) - free_surface_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², - timestepper) - - barotropic_velocity_kernel!(grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², - U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, - timestepper) + GC.@preserve η_args U_args begin + converted_η_args = map(cudaconvert, η_args) + converted_U_args = map(cudaconvert, U_args) + + @unroll for substep in 1:N + Base.@_inline_meta + + averaging_weight = weights[substep] + + free_surface_kernel!(converted_η_args...) + barotropic_velocity_kernel!(averaging_weight, converted_U_args...) + end end return nothing From 6f5e6b769cadceb3dc29c294b84307187e45837e Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:19:29 -0500 Subject: [PATCH 076/138] convert --- .../split_explicit_free_surface_kernels.jl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 36a4320148..f370628931 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -387,8 +387,13 @@ function iterate_split_explicit!(free_surface, grid, Δt) timestepper) GC.@preserve η_args U_args begin - converted_η_args = map(cudaconvert, η_args) - converted_U_args = map(cudaconvert, U_args) + + # Since we need to perform ~50 time-steps which means + # launching ~100 very small kernels, we are limited by + # latency of argument conversion to GPU-compatible values + # To alleviate that penalty we convert first and then we substep! + converted_η_args = convert_args(arch, η_args) + converted_U_args = convert_args(arch, U_args) @unroll for substep in 1:N Base.@_inline_meta @@ -403,6 +408,10 @@ function iterate_split_explicit!(free_surface, grid, Δt) return nothing end +convert_args(::CPU, arg) = args +convert_args(::GPU, arg) = cudaconvert(arg) +convert_args(::GPU, arg::Tuple) = map(cudaconvert, args) + # Calculate RHS for the barotopic time step. @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) i, j = @index(Global, NTuple) From acd1a541346ff8affd90d271fe25febb532efcfb Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:25:04 -0500 Subject: [PATCH 077/138] fixxing --- .../split_explicit_free_surface_kernels.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index f370628931..93fe48fc8b 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -410,7 +410,7 @@ end convert_args(::CPU, arg) = args convert_args(::GPU, arg) = cudaconvert(arg) -convert_args(::GPU, arg::Tuple) = map(cudaconvert, args) +convert_args(::GPU, arg::Tuple) = map(cudaconvert, arg) # Calculate RHS for the barotopic time step. @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) From ca7326891eb8e610311de870b948db981bbd6635 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:28:06 -0500 Subject: [PATCH 078/138] try it now? --- .../split_explicit_free_surface_kernels.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 93fe48fc8b..a0ebe7e5d4 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -395,7 +395,7 @@ function iterate_split_explicit!(free_surface, grid, Δt) converted_η_args = convert_args(arch, η_args) converted_U_args = convert_args(arch, U_args) - @unroll for substep in 1:N + @unroll for substep in 1:Nsubsteps Base.@_inline_meta averaging_weight = weights[substep] From 2d8ae26a36befe74db0e5ea8e4675a97c908477a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:16:39 -0500 Subject: [PATCH 079/138] bugfix --- .../split_explicit_free_surface_kernels.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index a0ebe7e5d4..24ff93f3d4 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -388,10 +388,10 @@ function iterate_split_explicit!(free_surface, grid, Δt) GC.@preserve η_args U_args begin - # Since we need to perform ~50 time-steps which means - # launching ~100 very small kernels, we are limited by - # latency of argument conversion to GPU-compatible values - # To alleviate that penalty we convert first and then we substep! + # We need to perform ~50 time-steps which means + # launching ~100 very small kernels: we are limited by + # latency of argument conversion to GPU-compatible values. + # To alleviate this penalty we convert first and then we substep! converted_η_args = convert_args(arch, η_args) converted_U_args = convert_args(arch, U_args) @@ -412,6 +412,8 @@ convert_args(::CPU, arg) = args convert_args(::GPU, arg) = cudaconvert(arg) convert_args(::GPU, arg::Tuple) = map(cudaconvert, arg) +convert_args(arch::Distributed, arg) = convert_args(child_architecture(arch), arg) + # Calculate RHS for the barotopic time step. @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) i, j = @index(Global, NTuple) From 5341b713f82fc72992f0997fe88b31b9cdefe968 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:49:53 -0500 Subject: [PATCH 080/138] add distributed --- .../split_explicit_free_surface_kernels.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 24ff93f3d4..1c2b85dbe4 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -10,6 +10,7 @@ using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f, SurfaceMap using Oceananigans.ImmersedBoundaries: mask_immersed_field!, use_only_active_surface_cells, use_only_active_interior_cells using Oceananigans.ImmersedBoundaries: active_linear_index_to_tuple, ActiveCellsIBG, ActiveSurfaceIBG +using Oceananigans.DistributedComputations: child_architecture # constants for AB3 time stepping scheme (from https://doi.org/10.1016/j.ocemod.2004.08.002) const β = 0.281105 From 1ce6a5ae0b2deff8d5a250e9613d9a90f8ced517 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 14 Dec 2023 05:12:26 -0500 Subject: [PATCH 081/138] bugfix --- src/ImmersedBoundaries/grid_fitted_bottom.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImmersedBoundaries/grid_fitted_bottom.jl b/src/ImmersedBoundaries/grid_fitted_bottom.jl index 8bd1d7de48..e693e20e2d 100644 --- a/src/ImmersedBoundaries/grid_fitted_bottom.jl +++ b/src/ImmersedBoundaries/grid_fitted_bottom.jl @@ -102,6 +102,6 @@ function on_architecture(arch, ib::GridFittedBottom{<:Field}) return GridFittedBottom(new_bottom_height, ib.immersed_condition) end -Adapt.adapt_structure(to, ib::GridFittedBottom) = GridFittedBottom(adapt(to, ib.bottom_height.data), +Adapt.adapt_structure(to, ib::GridFittedBottom) = GridFittedBottom(adapt(to, ib.bottom_height), ib.immersed_condition) From 53055d2e644ccb3d4516dc0515da64363b3aa93c Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 14 Dec 2023 08:35:07 -0500 Subject: [PATCH 082/138] allow unrolling --- .../split_explicit_free_surface_kernels.jl | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 1c2b85dbe4..b2ecf05d5f 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -310,9 +310,21 @@ function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurfac # reset free surface averages @apply_regionally begin - initialize_free_surface_state!(free_surface.state, free_surface.η, free_surface.settings.timestepper) + settings = free_surface.settings + + initialize_free_surface_state!(free_surface.state, free_surface.η, settings.timestepper) + + Nsubsteps = calculate_substeps(settings.substepping, Δt) + + # barotropic time step as fraction of baroclinic step and averaging weights + fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) + Nsubsteps = length(weights) + + # barotropic time step in seconds + Δτᴮ = fractional_Δt * Δt + # Solve for the free surface at tⁿ⁺¹ - iterate_split_explicit!(free_surface, free_surface_grid, Δt) + iterate_split_explicit!(free_surface, free_surface_grid, Δτᴮ, weights, Val(Nsubsteps)) # Reset eta for the next timestep set!(free_surface.η, free_surface.state.η̅) end @@ -347,7 +359,7 @@ const MINIMUM_SUBSTEPS = 5 const FixedSubstepsSetting{N} = SplitExplicitSettings{<:FixedSubstepNumber{<:Any, <:NTuple{N, <:Any}}} where N const FixedSubstepsSplitExplicit{F} = SplitExplicitFreeSurface{<:Any, <:Any, <:Any, <:Any, <:FixedSubstepsSetting{N}} where N -function iterate_split_explicit!(free_surface, grid, Δt) +function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsubsteps}) where Nsubsteps arch = architecture(grid) η = free_surface.η @@ -355,13 +367,6 @@ function iterate_split_explicit!(free_surface, grid, Δt) auxiliary = free_surface.auxiliary settings = free_surface.settings g = free_surface.gravitational_acceleration - - Nsubsteps = calculate_substeps(settings.substepping, Δt) - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - - Nsubsteps = length(weights) - - Δτᴮ = fractional_Δt * Δt # barotropic time step in seconds # unpack state quantities, parameters and forcing terms U, V = state.U, state.V From 4185152bece1f847cb2d893d6ab1594d9d9f7e76 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 14 Dec 2023 09:26:50 -0500 Subject: [PATCH 083/138] convert in archs --- src/Architectures.jl | 9 +++++++++ .../split_explicit_free_surface_kernels.jl | 8 +------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 54d7a95fc1..ec9c6b319b 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -113,4 +113,13 @@ end @inline unsafe_free!(a::CuArray) = CUDA.unsafe_free!(a) @inline unsafe_free!(a) = nothing +# Convert arguments to GPU-compatible types + +@inline convert_args(::CPU, arg) = args +@inline convert_args(::GPU, arg) = CUDA.cudaconvert(arg) +@inline convert_args(::GPU, arg::Tuple) = map(CUDA.cudaconvert, arg) + +@inline convert_args(arch::Distributed, arg) = convert_args(child_architecture(arch), arg) + + end # module diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index b2ecf05d5f..3bbc5460c7 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -5,7 +5,7 @@ using Oceananigans.Utils using Oceananigans.AbstractOperations: Δz using Oceananigans.BoundaryConditions using Oceananigans.Operators -using CUDA: cudaconvert +using Oceananigans.Architectures: convert_args using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f, SurfaceMap using Oceananigans.ImmersedBoundaries: mask_immersed_field!, use_only_active_surface_cells, use_only_active_interior_cells @@ -414,12 +414,6 @@ function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsu return nothing end -convert_args(::CPU, arg) = args -convert_args(::GPU, arg) = cudaconvert(arg) -convert_args(::GPU, arg::Tuple) = map(cudaconvert, arg) - -convert_args(arch::Distributed, arg) = convert_args(child_architecture(arch), arg) - # Calculate RHS for the barotopic time step. @kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) i, j = @index(Global, NTuple) From 0aa5b10499d3a34393738ff3ca4b026a49e3fc76 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Thu, 14 Dec 2023 07:29:25 -0800 Subject: [PATCH 084/138] bugfix --- src/Architectures.jl | 2 -- src/DistributedComputations/distributed_architectures.jl | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index ec9c6b319b..0dc61f71ed 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -119,7 +119,5 @@ end @inline convert_args(::GPU, arg) = CUDA.cudaconvert(arg) @inline convert_args(::GPU, arg::Tuple) = map(CUDA.cudaconvert, arg) -@inline convert_args(arch::Distributed, arg) = convert_args(child_architecture(arch), arg) - end # module diff --git a/src/DistributedComputations/distributed_architectures.jl b/src/DistributedComputations/distributed_architectures.jl index 33fcfc3da8..c9046ca834 100644 --- a/src/DistributedComputations/distributed_architectures.jl +++ b/src/DistributedComputations/distributed_architectures.jl @@ -2,7 +2,7 @@ using Oceananigans.Architectures using Oceananigans.Grids: topology, validate_tupled_argument using CUDA: ndevices, device! -import Oceananigans.Architectures: device, cpu_architecture, arch_array, array_type, child_architecture +import Oceananigans.Architectures: device, cpu_architecture, arch_array, array_type, child_architecture, convert_args import Oceananigans.Grids: zeros import Oceananigans.Utils: sync_device!, tupleit @@ -264,6 +264,7 @@ arch_array(arch::Distributed, A) = arch_array(child_architecture(arch), A) zeros(FT, arch::Distributed, N...) = zeros(FT, child_architecture(arch), N...) array_type(arch::Distributed) = array_type(child_architecture(arch)) sync_device!(arch::Distributed) = sync_device!(arch.child_architecture) +convert_args(arch::Distributed, arg) = convert_args(child_architecture(arch), arg) cpu_architecture(arch::DistributedCPU) = arch cpu_architecture(arch::Distributed{A, S}) where {A, S} = From 742837468c4c0c6a164204b25b7d827fcde68fe7 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 18 Dec 2023 09:14:08 +0100 Subject: [PATCH 085/138] just for testing --- .../split_explicit_free_surface.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index d8173550da..670c784bba 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -85,6 +85,20 @@ function FreeSurface(free_surface::SplitExplicitFreeSurface, velocities, grid) free_surface.settings) end +function SplitExplicitFreeSurface(grid; gravitational_acceleration = g_Earth, + settings = SplitExplicitSettings(eltype(grid); gravitational_acceleration, substeps = 200)) + + if eltype(settings) != eltype(grid) + @warn "Using $(eltype(settings)) settings for the SplitExplicitFreeSurface on a $(eltype(grid)) grid" + end + + η = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) + gravitational_acceleration = convert(eltype(grid), gravitational_acceleration) + + return SplitExplicitFreeSurface(η, SplitExplicitState(grid), SplitExplicitAuxiliaryFields(grid), + gravitational_acceleration, settings) +end + """ struct SplitExplicitState From b48d00c7d1a8bab7f7ac475c9dda08c11993c909 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 18 Dec 2023 09:16:55 +0100 Subject: [PATCH 086/138] removed useless particles --- .../inertial_particles.jl | 66 ------------------- 1 file changed, 66 deletions(-) delete mode 100644 validation/lagrangian_particles/inertial_particles.jl diff --git a/validation/lagrangian_particles/inertial_particles.jl b/validation/lagrangian_particles/inertial_particles.jl deleted file mode 100644 index e6bbfbe9d2..0000000000 --- a/validation/lagrangian_particles/inertial_particles.jl +++ /dev/null @@ -1,66 +0,0 @@ -using StructArrays -using Oceananigans -using Oceananigans: architecture -using Oceananigans.Models.LagrangianParticleTracking: AbstractParticle -using Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_w_from_continuity! -import Oceananigans.Models.LagrangianParticleTracking: particle_u_velocity, particle_v_velocity, particle_w_velocity - -struct InertialParticle{T} <: AbstractParticle - x :: T - y :: T - z :: T - u :: T - v :: T - w :: T - particle_respose_time :: T -end - -# 10 Particles with different inertia -x = ones(10) -y = ones(10) -z = ones(10) -u = zeros(10) -v = zeros(10) -w = zeros(10) - -particle_respose_time = range(0.1, 1.0, length = 10) - -properties = StructArray{InertialParticle}((x, y, z, u, v, w, particle_respose_time)) -particles = LagrangianParticles(properties) - -grid = RectilinearGrid(size = (50, 50, 50), x = (0, 2), y = (0, 2), z = (0, 2), topology = (Periodic, Periodic, Periodic)) - -u_fluid = XFaceField(grid) -v_fluid = YFaceField(grid) -w_fluid = ZFaceField(grid) - -@inline particles_u_velocity(u_fluid, particle, Δt) = particle.u + Δt / particles.particle_respose_time * (u_fluid - particle.u) -@inline particles_v_velocity(v_fluid, particle, Δt) = particle.v + Δt / particles.particle_respose_time * (v_fluid - particle.v) -@inline particles_w_velocity(w_fluid, particle, Δt) = particle.w + Δt / particles.particle_respose_time * (w_fluid - particle.w) - -set!(u_fluid, (x, y, z) -> rand()) -set!(v_fluid, (x, y, z) -> rand()) - -fill_halo_regions!((u_fluid, v_fluid)) - -compute_w_from_continuity!((; u = u_fluid, v = v_fluid, w = w_fluid), architecture(grid), grid) - -velocities = PrescribedVelocityFields(; u = u_fluid, v = v_fluid, w = w_fluid) - -model = HydrostaticFreeSurfaceModel(; grid, - tracers = (), - buoyancy = nothing, - particles, - velocities) - -simulation = Simulation(model, Δt = 1e-2, stop_time = 10) - -particles_save = [deepcopy(properties)] - -save_particles(sim) = - push!(particles_save, deepcopy(sim.model.particles.properties)) - -simulation.callbacks[:particles] = Callback(save_particles, IterationInterval(10)) - -run!(simulation) - From 4d36cc41bf5baf89fc4ee9cf33503de5dc8ea0c7 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 18 Dec 2023 09:17:37 +0100 Subject: [PATCH 087/138] removed bacthed stuff --- src/Solvers/batched_tridiagonal_solver.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Solvers/batched_tridiagonal_solver.jl b/src/Solvers/batched_tridiagonal_solver.jl index d69ad5ecfc..629638a98a 100644 --- a/src/Solvers/batched_tridiagonal_solver.jl +++ b/src/Solvers/batched_tridiagonal_solver.jl @@ -88,7 +88,7 @@ Reference implementation per Numerical Recipes, Press et al. 1992 (§ 2.4). Note a slightly different notation from Press et al. is used for indexing the off-diagonal elements; see [`BatchedTridiagonalSolver`](@ref). """ -function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...; only_active_cells = nothing) +function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...) launch_config = if solver.tridiagonal_direction isa XDirection :yz @@ -108,8 +108,7 @@ function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...; only_active_ solver.grid, solver.parameters, Tuple(args), - solver.tridiagonal_direction; - only_active_cells) + solver.tridiagonal_direction) return nothing end From 8842d055b2d263a7c72d75d9f0e6bdd7b7068bcc Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 18 Dec 2023 09:20:05 +0100 Subject: [PATCH 088/138] tracer advetion type --- src/Advection/tracer_advection_operators.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Advection/tracer_advection_operators.jl b/src/Advection/tracer_advection_operators.jl index f6bd834af5..82851cd598 100644 --- a/src/Advection/tracer_advection_operators.jl +++ b/src/Advection/tracer_advection_operators.jl @@ -1,22 +1,27 @@ using Oceananigans.Operators: Vᶜᶜᶜ using Oceananigans.Fields: ZeroField -struct ThreeDimensionalTracerAdvection{N, FT, A, B, C} <: AbstractAdvectionScheme{N, FT} +struct TracerAdvection{N, FT, A, B, C} <: AbstractAdvectionScheme{N, FT} x :: A y :: B z :: C - ThreeDimensionalTracerAdvection{N, FT}(x::A, y::B, z::C) where {N, FT, A, B, C} = new{N, FT, A, B, C}(x, y, z) + TracerAdvection{N, FT}(x::A, y::B, z::C) where {N, FT, A, B, C} = new{N, FT, A, B, C}(x, y, z) end -function ThreeDimensionalTracerAdvection(; x, y, z) +""" + function TracerAdvection(; x, y, z) + +builds a `TracerAdvection` type with different reconstructions in `x`, `y`, and `z` +""" +function TracerAdvection(; x, y, z) Nx = required_halo_size(x) Ny = required_halo_size(y) Nz = required_halo_size(z) FT = eltype(x) - return ThreeDimensionalTracerAdvection{max(Nx, Ny, Nz), FT}(x, y, z) + return TracerAdvection{max(Nx, Ny, Nz), FT}(x, y, z) end @inline _advective_tracer_flux_x(args...) = advective_tracer_flux_x(args...) @@ -51,7 +56,7 @@ which ends up at the location `ccc`. δzᵃᵃᶜ(i, j, k, grid, _advective_tracer_flux_z, advection, U.w, c)) end -@inline function div_Uc(i, j, k, grid, advection::ThreeDimensionalTracerAdvection, U, c) +@inline function div_Uc(i, j, k, grid, advection::TracerAdvection, U, c) return 1/Vᶜᶜᶜ(i, j, k, grid) * (δxᶜᵃᵃ(i, j, k, grid, _advective_tracer_flux_x, advection.x, U.u, c) + δyᵃᶜᵃ(i, j, k, grid, _advective_tracer_flux_y, advection.y, U.v, c) + δzᵃᵃᶜ(i, j, k, grid, _advective_tracer_flux_z, advection.z, U.w, c)) From 47ab44b6db63b598d51a7226a7f01d155e52c839 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 8 Jan 2024 12:53:42 -0500 Subject: [PATCH 089/138] bugfix --- .../HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 670c784bba..a88bf409bf 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -95,7 +95,7 @@ function SplitExplicitFreeSurface(grid; gravitational_acceleration = g_Earth, η = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) gravitational_acceleration = convert(eltype(grid), gravitational_acceleration) - return SplitExplicitFreeSurface(η, SplitExplicitState(grid), SplitExplicitAuxiliaryFields(grid), + return SplitExplicitFreeSurface(η, SplitExplicitState(grid, settings.timestepper), SplitExplicitAuxiliaryFields(grid), gravitational_acceleration, settings) end From 881bdb596aaaaf7e8bd32d73afcf0872ebfcee38 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 8 Jan 2024 13:47:05 -0500 Subject: [PATCH 090/138] bugfix --- .../split_explicit_free_surface_kernels.jl | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 3bbc5460c7..d0075e5aec 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -308,23 +308,24 @@ function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurfac # Wait for previous set up wait_free_surface_communication!(free_surface, architecture(free_surface_grid)) + # Calculate the substepping parameterers + settings = free_surface.settings + Nsubsteps = calculate_substeps(settings.substepping, Δt) + + # barotropic time step as fraction of baroclinic step and averaging weights + fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) + Nsubsteps = length(weights) + + # barotropic time step in seconds + Δτᴮ = fractional_Δt * Δt + # reset free surface averages @apply_regionally begin - settings = free_surface.settings - initialize_free_surface_state!(free_surface.state, free_surface.η, settings.timestepper) - Nsubsteps = calculate_substeps(settings.substepping, Δt) - - # barotropic time step as fraction of baroclinic step and averaging weights - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) - Nsubsteps = length(weights) - - # barotropic time step in seconds - Δτᴮ = fractional_Δt * Δt - # Solve for the free surface at tⁿ⁺¹ iterate_split_explicit!(free_surface, free_surface_grid, Δτᴮ, weights, Val(Nsubsteps)) + # Reset eta for the next timestep set!(free_surface.η, free_surface.state.η̅) end From f79a056ee9b18f2ca61075a30f5bb199686a1807 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 8 Jan 2024 14:19:01 -0500 Subject: [PATCH 091/138] other bugfix --- src/Architectures.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 0dc61f71ed..be016b3f82 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -115,9 +115,9 @@ end # Convert arguments to GPU-compatible types -@inline convert_args(::CPU, arg) = args -@inline convert_args(::GPU, arg) = CUDA.cudaconvert(arg) -@inline convert_args(::GPU, arg::Tuple) = map(CUDA.cudaconvert, arg) +@inline convert_args(::CPU, args) = args +@inline convert_args(::GPU, args) = CUDA.cudaconvert(args) +@inline convert_args(::GPU, args::Tuple) = map(CUDA.cudaconvert, args) end # module From c3a21a42d183cab09e6f19f15aaaf5867f7dfae6 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:07:13 -0500 Subject: [PATCH 092/138] other small bugfix --- test/test_split_explicit_free_surface_solver.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index ccb8aeb005..6ce237ccd7 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -42,9 +42,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc η₀(x, y, z) = sin(x) set!(η, η₀) - U₀(x, y) = 0 + U₀(x, y, z) = 0 set!(U, U₀) - V₀(x, y) = 0 + V₀(x, y, z) = 0 set!(V, V₀) η̅ .= 0 @@ -85,9 +85,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc # set!(η, f(x,y)) η₀(x, y, z) = sin(x) set!(η, η₀) - U₀(x, y) = 0 + U₀(x, y, z) = 0 set!(U, U₀) - V₀(x, y) = 0 + V₀(x, y, z) = 0 set!(V, V₀) η̅ .= 0 @@ -140,9 +140,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc V_avg = 3 η₀(x, y, z) = η_avg set!(η, η₀) - U₀(x, y) = U_avg + U₀(x, y, z) = U_avg set!(U, U₀) - V₀(x, y) = V_avg + V₀(x, y, z) = V_avg set!(V, V₀) η̅ .= 0 From 782f247fd8b3a0cef6d5e76e642403303aa9c090 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:27:55 -0500 Subject: [PATCH 093/138] first bugfix --- test/test_split_explicit_free_surface_solver.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index 6ce237ccd7..84f00003f9 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -246,11 +246,11 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc # ∂ₜₜ(η) = Δη η_exact = cos(ω * T) * (Array(η.data.parent)[2:Nx+1, 2:Ny+1] .- 1) .+ 1 - U₀(x, y) = kx * cos(kx * x) * sin(ky * y) # ∂ₜU = - ∂x(η), since we know η + U₀(x, y, z) = kx * cos(kx * x) * sin(ky * y) # ∂ₜU = - ∂x(η), since we know η set!(U, U₀) U_exact = -(sin(ω * T) * 1 / ω) .* Array(U.data.parent)[2:Nx+1, 2:Ny+1] .+ gu_c * T - V₀(x, y) = ky * sin(kx * x) * cos(ky * y) # ∂ₜV = - ∂y(η), since we know η + V₀(x, y, z) = ky * sin(kx * x) * cos(ky * y) # ∂ₜV = - ∂y(η), since we know η set!(V, V₀) V_exact = -(sin(ω * T) * 1 / ω) .* Array(V.data.parent)[2:Nx+1, 2:Ny+1] .+ gv_c * T From 7b92c64905288b973e7037fb7ffcb0ce56cd5a42 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:47:38 -0500 Subject: [PATCH 094/138] correct error --- .../split_explicit_free_surface.jl | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index a88bf409bf..15f157e848 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -130,7 +130,7 @@ Base.@kwdef struct SplitExplicitState{CC, ACC, FC, AFC, CF, ACF} "The time-filtered barotropic zonal velocity. (`ReducedField` over ``z``)" U̅ :: FC "The time-filtered barotropic meridional velocity. (`ReducedField` over ``z``)" - V̅ :: FC + V̅ :: CF end """ @@ -149,16 +149,16 @@ function SplitExplicitState(grid::AbstractGrid, timestepper) ηᵐ⁻¹ = auxiliary_free_surface_field(grid, timestepper) ηᵐ⁻² = auxiliary_free_surface_field(grid, timestepper) - U = ZFaceField(grid, indices = (:, :, size(grid, 3))) - V = ZFaceField(grid, indices = (:, :, size(grid, 3))) + U = XFaceField(grid, indices = (:, :, size(grid, 3))) + V = YFaceField(grid, indices = (:, :, size(grid, 3))) - Uᵐ⁻¹ = auxiliary_barotropic_velocity_field(grid, timestepper) - Vᵐ⁻¹ = auxiliary_barotropic_velocity_field(grid, timestepper) - Uᵐ⁻² = auxiliary_barotropic_velocity_field(grid, timestepper) - Vᵐ⁻² = auxiliary_barotropic_velocity_field(grid, timestepper) + Uᵐ⁻¹ = auxiliary_barotropic_U_field(grid, timestepper) + Vᵐ⁻¹ = auxiliary_barotropic_V_field(grid, timestepper) + Uᵐ⁻² = auxiliary_barotropic_U_field(grid, timestepper) + Vᵐ⁻² = auxiliary_barotropic_V_field(grid, timestepper) - U̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - V̅ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + U̅ = XFaceField(grid, indices = (:, :, size(grid, 3))) + V̅ = YFaceField(grid, indices = (:, :, size(grid, 3))) return SplitExplicitState(; ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅) end @@ -196,12 +196,12 @@ Return the `SplitExplicitAuxiliaryFields` for `grid`. """ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) - Gᵁ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Gⱽ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Gᵁ = XFaceField(grid, indices = (:, :, size(grid, 3))) + Gⱽ = YFaceField(grid, indices = (:, :, size(grid, 3))) - Hᶠᶜ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Hᶜᶠ = ZFaceField(grid, indices = (:, :, size(grid, 3))) - Hᶜᶜ = ZFaceField(grid, indices = (:, :, size(grid, 3))) + Hᶠᶜ = XFaceField(grid, indices = (:, :, size(grid, 3))) + Hᶜᶠ = YFaceField(grid, indices = (:, :, size(grid, 3))) + Hᶜᶜ = CenterField(grid, indices = (:, :, size(grid, 3))) dz = GridMetricOperation((Face, Center, Center), Δz, grid) Hᶠᶜ .= sum(dz; dims = 3) @@ -238,8 +238,10 @@ struct ForwardBackwardScheme end auxiliary_free_surface_field(grid, ::AdamsBashforth3Scheme) = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) auxiliary_free_surface_field(grid, ::ForwardBackwardScheme) = nothing -auxiliary_barotropic_velocity_field(grid, ::AdamsBashforth3Scheme) = ZFaceField(grid, indices = (:, :, size(grid, 3))) -auxiliary_barotropic_velocity_field(grid, ::ForwardBackwardScheme) = nothing +auxiliary_barotropic_U_field(grid, ::AdamsBashforth3Scheme) = XFaceField(grid, indices = (:, :, size(grid, 3))) +auxiliary_barotropic_U_field(grid, ::ForwardBackwardScheme) = nothing +auxiliary_barotropic_V_field(grid, ::AdamsBashforth3Scheme) = YFaceField(grid, indices = (:, :, size(grid, 3))) +auxiliary_barotropic_V_field(grid, ::ForwardBackwardScheme) = nothing # (p = 2, q = 4, r = 0.18927) minimize dispersion error from Shchepetkin and McWilliams (2005): https://doi.org/10.1016/j.ocemod.2004.08.002 @inline function averaging_shape_function(τ::FT; p = 2, q = 4, r = FT(0.18927)) where FT From 5e6dcb981a0d57f648513a032e6508bdfc0717da Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:54:46 -0500 Subject: [PATCH 095/138] some bugfixes --- ...distributed_split_explicit_free_surface.jl | 19 +---------- .../split_explicit_free_surface.jl | 23 +------------ .../split_explicit_free_surface_kernels.jl | 34 +++++++++++-------- ...ulti_region_split_explicit_free_surface.jl | 13 +------ 4 files changed, 23 insertions(+), 66 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl index 6f5067e555..be65080829 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl @@ -10,30 +10,13 @@ function SplitExplicitAuxiliaryFields(grid::DistributedGrid) Gᵁ = Field((Face, Center, Nothing), grid) Gⱽ = Field((Center, Face, Nothing), grid) - Hᶠᶜ = Field((Face, Center, Nothing), grid) - Hᶜᶠ = Field((Center, Face, Nothing), grid) - Hᶜᶜ = Field((Center, Center, Nothing), grid) - - calculate_column_height!(Hᶠᶜ, (Face, Center, Center)) - calculate_column_height!(Hᶜᶠ, (Center, Face, Center)) - - calculate_column_height!(Hᶜᶜ, (Center, Center, Center)) - - fill_halo_regions!((Hᶠᶜ, Hᶜᶠ, Hᶜᶜ)) - # In a non-parallel grid we calculate only the interior kernel_size = augmented_kernel_size(grid) kernel_offsets = augmented_kernel_offsets(grid) kernel_parameters = KernelParameters(kernel_size, kernel_offsets) - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, Hᶜᶜ, kernel_parameters) -end - -"""Integrate z at locations `location` and set! `height`` with the result""" -@inline function calculate_column_height!(height, location) - dz = GridMetricOperation(location, Δz, height.grid) - return sum!(height, dz) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, kernel_parameters) end @inline function augmented_kernel_size(grid::DistributedGrid) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 15f157e848..92edaff25d 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -179,12 +179,6 @@ Base.@kwdef struct SplitExplicitAuxiliaryFields{𝒞ℱ, ℱ𝒞, 𝒞𝒞, 𝒦 Gᵁ :: ℱ𝒞 "Vertically-integrated slow barotropic forcing function for `V` (`ReducedField` over ``z``)" Gⱽ :: 𝒞ℱ - "Depth at `(Face, Center)` (`ReducedField` over ``z``)" - Hᶠᶜ :: ℱ𝒞 - "Depth at `(Center, Face)` (`ReducedField` over ``z``)" - Hᶜᶠ :: 𝒞ℱ - "Depth at `(Center, Center)` (`ReducedField` over ``z``)" - Hᶜᶜ :: 𝒞𝒞 "kernel size for barotropic time stepping" kernel_parameters :: 𝒦 end @@ -199,24 +193,9 @@ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) Gᵁ = XFaceField(grid, indices = (:, :, size(grid, 3))) Gⱽ = YFaceField(grid, indices = (:, :, size(grid, 3))) - Hᶠᶜ = XFaceField(grid, indices = (:, :, size(grid, 3))) - Hᶜᶠ = YFaceField(grid, indices = (:, :, size(grid, 3))) - Hᶜᶜ = CenterField(grid, indices = (:, :, size(grid, 3))) - - dz = GridMetricOperation((Face, Center, Center), Δz, grid) - Hᶠᶜ .= sum(dz; dims = 3) - - dz = GridMetricOperation((Center, Face, Center), Δz, grid) - Hᶜᶠ .= sum(dz; dims = 3) - - dz = GridMetricOperation((Center, Center, Center), Δz, grid) - Hᶜᶜ .= sum(dz; dims = 3) - - fill_halo_regions!((Hᶠᶜ, Hᶜᶠ, Hᶜᶜ)) - kernel_parameters = :xy - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, Hᶜᶜ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, kernel_parameters) end """ diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index d0075e5aec..ca98e44030 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -11,6 +11,8 @@ using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f, SurfaceMap using Oceananigans.ImmersedBoundaries: mask_immersed_field!, use_only_active_surface_cells, use_only_active_interior_cells using Oceananigans.ImmersedBoundaries: active_linear_index_to_tuple, ActiveCellsIBG, ActiveSurfaceIBG using Oceananigans.DistributedComputations: child_architecture +using Oceananigans.DistributedComputations: Distributed +using Printf # constants for AB3 time stepping scheme (from https://doi.org/10.1016/j.ocemod.2004.08.002) const β = 0.281105 @@ -136,8 +138,13 @@ end return nothing end -using Oceananigans.DistributedComputations: Distributed -using Printf +# Column height for the split explicit solver +@inline column_heightᶜᶜ(i, j, k, grid) = grid.Lz +@inline column_heightᶜᶜ(i, j, k, grid::ImmersedBoundaryGrid) = min(grid.Lz, @inbounds grid.immersed_boundary.bottom_height[i, j, 1]) + +@inline column_heightᶠᶜ(i, j, grid) = ℑxᶠᵃᵃ(i, j, 1, grid, column_heightᶜᶜ) +@inline column_heightᶜᶠ(i, j, grid) = ℑyᵃᶠᵃ(i, j, 1, grid, column_heightᶜᶜ) + @kernel function _split_explicit_free_surface!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) i, j = @index(Global, NTuple) @@ -160,20 +167,20 @@ end @kernel function _split_explicit_barotropic_velocity!(averaging_weight, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + η̅, U̅, V̅, Gᵁ, Gⱽ, g, timestepper) i, j = @index(Global, NTuple) velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + Gᵁ, Gⱽ, g, timestepper) end @inline function velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + Gᵁ, Gⱽ, g, timestepper) k_top = grid.Nz+1 @@ -184,8 +191,8 @@ end advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) # ∂τ(U) = - ∇η + G - U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j, k_top-1] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) - V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j, k_top-1] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) + U[i, j, k_top-1] += Δτ * (- g * column_heightᶠᶜ(i, j, grid) * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) + V[i, j, k_top-1] += Δτ * (- g * column_heightᶜᶠ(i, j, grid) * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) # time-averaging η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] @@ -261,20 +268,19 @@ function initialize_auxiliary_state!(state, η, timestepper) return nothing end -@kernel function _barotropic_split_explicit_corrector!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) +@kernel function _barotropic_split_explicit_corrector!(u, v, U̅, V̅, U, V, grid) i, j, k = @index(Global, NTuple) k_top = grid.Nz+1 @inbounds begin - u[i, j, k] = u[i, j, k] + (U̅[i, j, k_top-1] - U[i, j, k_top-1]) / Hᶠᶜ[i, j, k_top-1] - v[i, j, k] = v[i, j, k] + (V̅[i, j, k_top-1] - V[i, j, k_top-1]) / Hᶜᶠ[i, j, k_top-1] + u[i, j, k] = u[i, j, k] + (U̅[i, j, k_top-1] - U[i, j, k_top-1]) / column_heightᶠᶜ(i, j, grid) + v[i, j, k] = v[i, j, k] + (V̅[i, j, k_top-1] - V[i, j, k_top-1]) / column_heightᶜᶠ(i, j, grid) end end # may need to do Val(Nk) since it may not be known at compile. Also figure out where to put H function barotropic_split_explicit_corrector!(u, v, free_surface, grid) sefs = free_surface.state - Hᶠᶜ, Hᶜᶠ = free_surface.auxiliary.Hᶠᶜ, free_surface.auxiliary.Hᶜᶠ U, V, U̅, V̅ = sefs.U, sefs.V, sefs.U̅, sefs.V̅ arch = architecture(grid) @@ -283,7 +289,7 @@ function barotropic_split_explicit_corrector!(u, v, free_surface, grid) compute_barotropic_mode!(U, V, grid, u, v) # add in "good" barotropic mode launch!(arch, grid, :xyz, _barotropic_split_explicit_corrector!, - u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) + u, v, U̅, V̅, U, V, grid) return nothing end @@ -375,7 +381,7 @@ function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsu Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ - Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ + Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ timestepper = settings.timestepper @@ -390,7 +396,7 @@ function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsu U_args = (grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, Gᵁ, Gⱽ, g, Hᶠᶜ, Hᶜᶠ, + η̅, U̅, V̅, Gᵁ, Gⱽ, g, timestepper) GC.@preserve η_args U_args begin diff --git a/src/MultiRegion/multi_region_split_explicit_free_surface.jl b/src/MultiRegion/multi_region_split_explicit_free_surface.jl index c645452038..59dd3770b1 100644 --- a/src/MultiRegion/multi_region_split_explicit_free_surface.jl +++ b/src/MultiRegion/multi_region_split_explicit_free_surface.jl @@ -9,24 +9,13 @@ function SplitExplicitAuxiliaryFields(grid::MultiRegionGrids) Gᵁ = Field((Face, Center, Nothing), grid) Gⱽ = Field((Center, Face, Nothing), grid) - Hᶠᶜ = Field((Face, Center, Nothing), grid) - Hᶜᶠ = Field((Center, Face, Nothing), grid) - Hᶜᶜ = Field((Center, Center, Nothing), grid) - - @apply_regionally calculate_column_height!(Hᶠᶜ, (Face, Center, Center)) - @apply_regionally calculate_column_height!(Hᶜᶠ, (Center, Face, Center)) - - @apply_regionally calculate_column_height!(Hᶜᶜ, (Center, Center, Center)) - - fill_halo_regions!((Hᶠᶜ, Hᶜᶠ, Hᶜᶜ)) - # In a non-parallel grid we calculate only the interior @apply_regionally kernel_size = augmented_kernel_size(grid, grid.partition) @apply_regionally kernel_offsets = augmented_kernel_offsets(grid, grid.partition) @apply_regionally kernel_parameters = KernelParameters(kernel_size, kernel_offsets) - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, Hᶜᶜ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, kernel_parameters) end @inline function calculate_column_height!(height, location) From 056def573322cac0782d055f56339d73ea98171b Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:59:05 -0500 Subject: [PATCH 096/138] bugfix --- .../HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 92edaff25d..e37e567fb4 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -174,7 +174,7 @@ large (or `:xy` in case of a serial computation), and start computing from $(FIELDS) """ -Base.@kwdef struct SplitExplicitAuxiliaryFields{𝒞ℱ, ℱ𝒞, 𝒞𝒞, 𝒦} +Base.@kwdef struct SplitExplicitAuxiliaryFields{𝒞ℱ, ℱ𝒞, 𝒦} "Vertically-integrated slow barotropic forcing function for `U` (`ReducedField` over ``z``)" Gᵁ :: ℱ𝒞 "Vertically-integrated slow barotropic forcing function for `V` (`ReducedField` over ``z``)" From 630f0fafab8ab334ed575629ba81db4a95c57412 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 18:01:07 -0500 Subject: [PATCH 097/138] slightly more optim --- .../distributed_split_explicit_free_surface.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl index be65080829..f17331731e 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl @@ -7,8 +7,8 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: FreeSurface, SplitExpli function SplitExplicitAuxiliaryFields(grid::DistributedGrid) - Gᵁ = Field((Face, Center, Nothing), grid) - Gⱽ = Field((Center, Face, Nothing), grid) + Gᵁ = XFaceField(grid, indices = (:, :, size(grid, 3))) + Gⱽ = YFaceField(grid, indices = (:, :, size(grid, 3))) # In a non-parallel grid we calculate only the interior kernel_size = augmented_kernel_size(grid) From e70a57d8a5ea5a2fb54473addef4fe9e2086e9ba Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 18:21:35 -0500 Subject: [PATCH 098/138] simplifying more --- src/ImmersedBoundaries/grid_fitted_bottom.jl | 10 ++++++++++ .../split_explicit_free_surface.jl | 2 +- .../split_explicit_free_surface_kernels.jl | 11 ++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/ImmersedBoundaries/grid_fitted_bottom.jl b/src/ImmersedBoundaries/grid_fitted_bottom.jl index e693e20e2d..bea53d46cd 100644 --- a/src/ImmersedBoundaries/grid_fitted_bottom.jl +++ b/src/ImmersedBoundaries/grid_fitted_bottom.jl @@ -72,12 +72,22 @@ Computes ib.bottom_height and wraps in an array. function ImmersedBoundaryGrid(grid, ib::GridFittedBottom) bottom_field = Field{Center, Center, Nothing}(grid) set!(bottom_field, ib.bottom_height) + launch!(architecture(grid), grid, :xy, _limit_bottom_heigth!, bottom_field, grid.Lz) + fill_halo_regions!(bottom_field) new_ib = GridFittedBottom(bottom_field, ib.immersed_condition) TX, TY, TZ = topology(grid) return ImmersedBoundaryGrid{TX, TY, TZ}(grid, new_ib) end +# Make sure that `abs(bottom_height) <= grid.Lz` to constrain the bottom +@kernel function _limit_bottom_heigth!(bottom_field, Lz) + i, j = @index(Global, NTuple) + if abs(bottom_field[i, j, 1]) > Lz + bottom_field[i, j, 1] = sign(bottom_field[i, j, 1]) * Lz + end +end + @inline function _immersed_cell(i, j, k, underlying_grid, ib::GridFittedBottom{<:Any, <:InterfaceImmersedCondition}) z = znode(i, j, k+1, underlying_grid, c, c, f) h = @inbounds ib.bottom_height[i, j, 1] diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index e37e567fb4..4132154897 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -68,7 +68,7 @@ Keyword Arguments is chosen. - `timestepper`: Time stepping scheme used for the barotropic advancement. Choose one of: - - `ForwardBackwardScheme()` (default): `η = f(U)` then `U = f(η)`, + - `ForwardBackwardScheme()` (default): `ηᵐ⁺¹ = f(U)` then `U = f(η)`, - `AdamsBashforth3Scheme()`: `η = f(U, Uᵐ⁻¹, Uᵐ⁻²)` then `U = f(η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²)`. """ SplitExplicitFreeSurface(FT::DataType = Float64; gravitational_acceleration = g_Earth, kwargs...) = diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index ca98e44030..c9910297b6 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -6,7 +6,7 @@ using Oceananigans.AbstractOperations: Δz using Oceananigans.BoundaryConditions using Oceananigans.Operators using Oceananigans.Architectures: convert_args -using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node +using Oceananigans.ImmersedBoundaries: peripheral_node, immersed_inactive_node, GFBIBG using Oceananigans.ImmersedBoundaries: inactive_node, IBG, c, f, SurfaceMap using Oceananigans.ImmersedBoundaries: mask_immersed_field!, use_only_active_surface_cells, use_only_active_interior_cells using Oceananigans.ImmersedBoundaries: active_linear_index_to_tuple, ActiveCellsIBG, ActiveSurfaceIBG @@ -139,8 +139,13 @@ end end # Column height for the split explicit solver -@inline column_heightᶜᶜ(i, j, k, grid) = grid.Lz -@inline column_heightᶜᶜ(i, j, k, grid::ImmersedBoundaryGrid) = min(grid.Lz, @inbounds grid.immersed_boundary.bottom_height[i, j, 1]) +@inline column_heightᶜᶜ(i, j, k, grid) = grid.Lz + +# Column height for an GridFitted bottom immersed boundary +@inline function column_heightᶜᶜ(i, j, k, grid::GFBIBG) + bottom = grid.immersed_boundary.bottom_height[i, j, 1] + return ifelse(bottom < 0, - bottom, grid.Lz - bottom) +end @inline column_heightᶠᶜ(i, j, grid) = ℑxᶠᵃᵃ(i, j, 1, grid, column_heightᶜᶜ) @inline column_heightᶜᶠ(i, j, grid) = ℑyᵃᶠᵃ(i, j, 1, grid, column_heightᶜᶜ) From c1c310157ee540f2b072a37f4e7b5d34bd5bd8bd Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 19:46:17 -0500 Subject: [PATCH 099/138] all tests should be ok --- src/ImmersedBoundaries/grid_fitted_bottom.jl | 8 +- ...distributed_split_explicit_free_surface.jl | 6 +- .../split_explicit_free_surface.jl | 22 +++-- ...ulti_region_split_explicit_free_surface.jl | 6 +- ...test_split_explicit_free_surface_solver.jl | 80 +++++-------------- .../test_split_explicit_vertical_integrals.jl | 27 +++---- 6 files changed, 59 insertions(+), 90 deletions(-) diff --git a/src/ImmersedBoundaries/grid_fitted_bottom.jl b/src/ImmersedBoundaries/grid_fitted_bottom.jl index bea53d46cd..f1ca0f1e4b 100644 --- a/src/ImmersedBoundaries/grid_fitted_bottom.jl +++ b/src/ImmersedBoundaries/grid_fitted_bottom.jl @@ -72,8 +72,10 @@ Computes ib.bottom_height and wraps in an array. function ImmersedBoundaryGrid(grid, ib::GridFittedBottom) bottom_field = Field{Center, Center, Nothing}(grid) set!(bottom_field, ib.bottom_height) - launch!(architecture(grid), grid, :xy, _limit_bottom_heigth!, bottom_field, grid.Lz) - + + # Make sure that `abs(bottom_height) <= grid.Lz` to constrain the bottom + @apply_regionally launch!(architecture(grid), grid, :xy, _limit_bottom_height!, bottom_field, grid.Lz) + fill_halo_regions!(bottom_field) new_ib = GridFittedBottom(bottom_field, ib.immersed_condition) TX, TY, TZ = topology(grid) @@ -81,7 +83,7 @@ function ImmersedBoundaryGrid(grid, ib::GridFittedBottom) end # Make sure that `abs(bottom_height) <= grid.Lz` to constrain the bottom -@kernel function _limit_bottom_heigth!(bottom_field, Lz) +@kernel function _limit_bottom_height!(bottom_field, Lz) i, j = @index(Global, NTuple) if abs(bottom_field[i, j, 1]) > Lz bottom_field[i, j, 1] = sign(bottom_field[i, j, 1]) * Lz diff --git a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl index f17331731e..d7f0fe42c5 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl @@ -7,8 +7,10 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: FreeSurface, SplitExpli function SplitExplicitAuxiliaryFields(grid::DistributedGrid) - Gᵁ = XFaceField(grid, indices = (:, :, size(grid, 3))) - Gⱽ = YFaceField(grid, indices = (:, :, size(grid, 3))) + Nz = size(grid, 3) + + Gᵁ = XFaceField(grid, indices = (:, :, Nz)) + Gⱽ = YFaceField(grid, indices = (:, :, Nz)) # In a non-parallel grid we calculate only the interior kernel_size = augmented_kernel_size(grid) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 4132154897..3d88d92c18 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -92,7 +92,8 @@ function SplitExplicitFreeSurface(grid; gravitational_acceleration = g_Earth, @warn "Using $(eltype(settings)) settings for the SplitExplicitFreeSurface on a $(eltype(grid)) grid" end - η = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) + Nz = size(grid, 3) + η = ZFaceField(grid, indices = (:, :, Nz+1)) gravitational_acceleration = convert(eltype(grid), gravitational_acceleration) return SplitExplicitFreeSurface(η, SplitExplicitState(grid, settings.timestepper), SplitExplicitAuxiliaryFields(grid), @@ -143,22 +144,25 @@ acts as a filter for `η`. Values with superscripts `m-1` and `m-2` correspond t time steps to allow using a higher-order time stepping scheme, e.g., `AdamsBashforth3Scheme`. """ function SplitExplicitState(grid::AbstractGrid, timestepper) - η̅ = ZFaceField(grid, indices = (:, :, size(grid, 3)+1)) + + Nz = size(grid, 3) + + η̅ = ZFaceField(grid, indices = (:, :, Nz+1)) ηᵐ = auxiliary_free_surface_field(grid, timestepper) ηᵐ⁻¹ = auxiliary_free_surface_field(grid, timestepper) ηᵐ⁻² = auxiliary_free_surface_field(grid, timestepper) - U = XFaceField(grid, indices = (:, :, size(grid, 3))) - V = YFaceField(grid, indices = (:, :, size(grid, 3))) + U = XFaceField(grid, indices = (:, :, Nz)) + V = YFaceField(grid, indices = (:, :, Nz)) Uᵐ⁻¹ = auxiliary_barotropic_U_field(grid, timestepper) Vᵐ⁻¹ = auxiliary_barotropic_V_field(grid, timestepper) Uᵐ⁻² = auxiliary_barotropic_U_field(grid, timestepper) Vᵐ⁻² = auxiliary_barotropic_V_field(grid, timestepper) - U̅ = XFaceField(grid, indices = (:, :, size(grid, 3))) - V̅ = YFaceField(grid, indices = (:, :, size(grid, 3))) + U̅ = XFaceField(grid, indices = (:, :, Nz)) + V̅ = YFaceField(grid, indices = (:, :, Nz)) return SplitExplicitState(; ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅) end @@ -190,8 +194,10 @@ Return the `SplitExplicitAuxiliaryFields` for `grid`. """ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) - Gᵁ = XFaceField(grid, indices = (:, :, size(grid, 3))) - Gⱽ = YFaceField(grid, indices = (:, :, size(grid, 3))) + Nz = size(grid, 3) + + Gᵁ = XFaceField(grid, indices = (:, :, Nz)) + Gⱽ = YFaceField(grid, indices = (:, :, Nz)) kernel_parameters = :xy diff --git a/src/MultiRegion/multi_region_split_explicit_free_surface.jl b/src/MultiRegion/multi_region_split_explicit_free_surface.jl index 59dd3770b1..3e3c4ec842 100644 --- a/src/MultiRegion/multi_region_split_explicit_free_surface.jl +++ b/src/MultiRegion/multi_region_split_explicit_free_surface.jl @@ -6,8 +6,10 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: FreeSurface, SplitExpli function SplitExplicitAuxiliaryFields(grid::MultiRegionGrids) - Gᵁ = Field((Face, Center, Nothing), grid) - Gⱽ = Field((Center, Face, Nothing), grid) + Nz = size(grid, 3) + + Gᵁ = XFaceField(grid; indices = (:, :, Nz)) + Gⱽ = YFaceField(grid; indices = (:, :, Nz)) # In a non-parallel grid we calculate only the interior @apply_regionally kernel_size = augmented_kernel_size(grid, grid.partition) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index 84f00003f9..4302a182c7 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -1,7 +1,7 @@ include("dependencies_for_runtests.jl") using Oceananigans.Models.HydrostaticFreeSurfaceModels import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitFreeSurface -import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings, split_explicit_free_surface_substep! +import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings, iterate_split_explicit! using Oceananigans.Models.HydrostaticFreeSurfaceModels: constant_averaging_kernel using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calculate_adaptive_settings @@ -12,51 +12,35 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc for arch in archs topology = (Periodic, Periodic, Bounded) - Nx, Ny, Nz = 128, 64, 16 - Lx = Ly = Lz = 2π + Nx, Ny, Nz = 128, 1, 1 + Lx = Ly = 2π + Lz = 1 / Oceananigans.BuoyancyModels.g_Earth grid = RectilinearGrid(arch, FT; topology, size = (Nx, Ny, Nz), x = (0, Lx), y = (0, Ly), z = (-Lz, 0), halo=(1, 1, 1)) - settings = SplitExplicitSettings(; substeps = 200, averaging_kernel = constant_averaging_kernel) + settings = SplitExplicitSettings(eltype(grid); substeps = 200, averaging_kernel = constant_averaging_kernel) sefs = SplitExplicitFreeSurface(grid; settings) sefs.η .= 0 @testset " One timestep test " begin state = sefs.state - auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ - Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - Hᶠᶜ, Hᶜᶠ = sefs.auxiliary.Hᶠᶜ, sefs.auxiliary.Hᶜᶠ - g = sefs.gravitational_acceleration - Hᶠᶜ .= 1 / g - Hᶜᶠ .= 1 / g η = sefs.η - velocity_weight = 0.0 - free_surface_weight = 0.0 Δτ = 1.0 η₀(x, y, z) = sin(x) set!(η, η₀) - U₀(x, y, z) = 0 - set!(U, U₀) - V₀(x, y, z) = 0 - set!(V, V₀) - - η̅ .= 0 - U̅ .= 0 - V̅ .= 0 - Gᵁ .= 0 - Gⱽ .= 0 - - Nsubsteps = calculate_substeps(settings.substepping, 1) + + Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ, 1) + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) + U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] U_exact = (reshape(-cos.(grid.xᶠᵃᵃ), (length(grid.xᶜᵃᵃ), 1)).+reshape(0 * grid.yᵃᶜᵃ, (1, length(grid.yᵃᶜᵃ))))[2:Nx+1, 2:Ny+1] @@ -68,19 +52,14 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - g = sefs.gravitational_acceleration - sefs.auxiliary.Hᶠᶜ .= 1 / g - sefs.auxiliary.Hᶜᶠ .= 1 / g η = sefs.η - velocity_weight = 0.0 - free_surface_weight = 0.0 - T = 2π - Δτ = 2π / maximum([Nx, Ny]) * 5e-2 # the last factor is essentially the order of accuracy + T = 2π + Δτ = 2π / maximum([Nx, Ny]) * 5e-1 # the last factor is essentially the order of accuracy Nt = floor(Int, T / Δτ) Δτ_end = T - Nt * Δτ - settings = SplitExplicitSettings(; substeps = Nt, averaging_kernel = constant_averaging_kernel) + settings = SplitExplicitSettings(eltype(grid); substeps = Nt, averaging_kernel = constant_averaging_kernel) # set!(η, f(x,y)) η₀(x, y, z) = sin(x) @@ -90,22 +69,17 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc V₀(x, y, z) = 0 set!(V, V₀) - η̅ .= 0 - U̅ .= 0 - V̅ .= 0 + η̅ .= 0 + U̅ .= 0 + V̅ .= 0 Gᵁ .= 0 Gⱽ .= 0 Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - - for i in 1:Nt - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, settings, weights, arch, grid, g, Δτ, i) - end - - # + correction for exact time - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, settings, weights, arch, grid, g, Δτ_end, 1) + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nsubsteps)) + U_computed = Array(parent(U))[2:Nx+1, 2:Ny+1] η_computed = Array(parent(η))[2:Nx+1, 2:Ny+1] set!(η, η₀) @@ -126,13 +100,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ g = sefs.gravitational_acceleration - sefs.auxiliary.Hᶠᶜ .= 1 / g - sefs.auxiliary.Hᶜᶠ .= 1 / g η = sefs.η - velocity_weight = 0.0 - free_surface_weight = 0.0 - Δτ = 2π / maximum([Nx, Ny]) * 5e-2 # the last factor is essentially the order of accuracy + Δτ = 2π / maximum([Nx, Ny]) * 5e-1 # the last factor is essentially the order of accuracy # set!(η, f(x, y)) η_avg = 1 @@ -155,9 +125,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - for i in 1:Nsubsteps - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ, i) - end + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nsubsteps)) U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] V_computed = Array(V.data.parent)[2:Nx+1, 2:Ny+1] @@ -197,8 +165,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ η = sefs.η g = sefs.gravitational_acceleration - sefs.auxiliary.Hᶠᶜ .= 1 / g # to make life easy - sefs.auxiliary.Hᶜᶠ .= 1 / g # to make life easy # set!(η, f(x,y)) k² = ω² gu_c = 1 @@ -221,12 +187,8 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - - for i in 1:Nt - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ, i) - end - # + correction for exact time - split_explicit_free_surface_substep!(η, sefs.state, sefs.auxiliary, sefs.settings, weights, arch, grid, g, Δτ_end, Nt + 1) + + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nsubsteps)) η_mean_after = mean(Array(interior(η))) diff --git a/test/test_split_explicit_vertical_integrals.jl b/test/test_split_explicit_vertical_integrals.jl index aad8fbd8d6..6016053926 100644 --- a/test/test_split_explicit_vertical_integrals.jl +++ b/test/test_split_explicit_vertical_integrals.jl @@ -1,8 +1,8 @@ include("dependencies_for_runtests.jl") using Oceananigans.Models.HydrostaticFreeSurfaceModels -import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitFreeSurface -import Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings -import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode!, barotropic_split_explicit_corrector!, initialize_free_surface_state! +using Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitFreeSurface +using Oceananigans.Models.HydrostaticFreeSurfaceModels: SplitExplicitState, SplitExplicitAuxiliaryFields, SplitExplicitSettings +using Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode!, barotropic_split_explicit_corrector!, initialize_free_surface_state! @testset "Barotropic Kernels" begin @@ -17,7 +17,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode grid = RectilinearGrid(arch, topology = topology, size = (Nx, Ny, Nz), x = (0, Lx), y = (0, Ly), z = (-Lz, 0)) tmp = SplitExplicitFreeSurface(; substeps = 200) - sefs = SplitExplicitState(grid) + sefs = SplitExplicitState(grid, tmp.settings.timestepper) sefs = SplitExplicitAuxiliaryFields(grid) sefs = SplitExplicitFreeSurface(grid) @@ -25,7 +25,6 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - Hᶠᶜ, Hᶜᶠ = sefs.auxiliary.Hᶠᶜ, sefs.auxiliary.Hᶜᶠ u = Field{Face,Center,Center}(grid) v = Field{Center,Face,Center}(grid) @@ -34,7 +33,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode # set equal to something else η̅ .= U̅ .= V̅ .= 1.0 # now set equal to zero - initialize_free_surface_state!(sefs.state, sefs.η) + initialize_free_surface_state!(sefs.state, sefs.η, sefs.settings.timestepper) # don't forget the ghost points fill_halo_regions!(η̅) fill_halo_regions!(U̅) @@ -51,7 +50,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode Δz .= grid.Δzᵃᵃᶠ set_u_check(x, y, z) = cos((π / 2) * z / Lz) - set_U_check(x, y) = (sin(0) - (-2 * Lz / (π))) + set_U_check(x, y, z) = (sin(0) - (-2 * Lz / (π))) set!(u, set_u_check) exact_U = similar(U) set!(exact_U, set_U_check) @@ -60,7 +59,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode @test all((Array(interior(U) .- interior(exact_U))) .< tolerance) set_v_check(x, y, z) = sin(x * y) * cos((π / 2) * z / Lz) - set_V_check(x, y) = sin(x * y) * (sin(0) - (-2 * Lz / (π))) + set_V_check(x, y, z) = sin(x * y) * (sin(0) - (-2 * Lz / (π))) set!(v, set_v_check) exact_V = similar(V) set!(exact_V, set_V_check) @@ -83,7 +82,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode @test all(Array(interior(U)) .≈ Lz) set_u_check(x, y, z) = sin(x) - set_U_check(x, y) = sin(x) * Lz + set_U_check(x, y, z) = sin(x) * Lz set!(u, set_u_check) exact_U = similar(U) set!(exact_U, set_U_check) @@ -91,7 +90,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode @test all(Array(interior(U)) .≈ Array(interior(exact_U))) set_v_check(x, y, z) = sin(x) * z * cos(y) - set_V_check(x, y) = -sin(x) * Lz^2 / 2.0 * cos(y) + set_V_check(x, y, z) = -sin(x) * Lz^2 / 2.0 * cos(y) set!(v, set_v_check) exact_V = similar(V) set!(exact_V, set_V_check) @@ -116,7 +115,6 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode auxiliary = sefs.auxiliary U, V, η̅, U̅, V̅ = state.U, state.V, state.η̅, state.U̅, state.V̅ Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - Hᶠᶜ, Hᶜᶠ = sefs.auxiliary.Hᶠᶜ, sefs.auxiliary.Hᶜᶠ u = Field{Face,Center,Center}(grid) v = Field{Center,Face,Center}(grid) @@ -124,22 +122,19 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_barotropic_mode v_corrected = similar(v) set_u(x, y, z) = z + Lz / 2 + sin(x) - set_U̅(x, y) = cos(x) * Lz + set_U̅(x, y, z) = cos(x) * Lz set_u_corrected(x, y, z) = z + Lz / 2 + cos(x) set!(u, set_u) set!(U̅, set_U̅) set!(u_corrected, set_u_corrected) set_v(x, y, z) = (z + Lz / 2) * sin(y) + sin(x) - set_V̅(x, y) = (cos(x) + x) * Lz + set_V̅(x, y, z) = (cos(x) + x) * Lz set_v_corrected(x, y, z) = (z + Lz / 2) * sin(y) + cos(x) + x set!(v, set_v) set!(V̅, set_V̅) set!(v_corrected, set_v_corrected) - sefs.auxiliary.Hᶠᶜ .= Lz - sefs.auxiliary.Hᶜᶠ .= Lz - Δz = zeros(Nz) Δz .= grid.Δzᵃᵃᶜ From ff66175554ed89c560caeb0864ff50d3922463d3 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 21:14:47 -0500 Subject: [PATCH 100/138] try it --- test/test_split_explicit_free_surface_solver.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index 4302a182c7..756e03068c 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -182,7 +182,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Gᵁ .= gu_c Gⱽ .= gv_c - settings = SplitExplicitSettings(substeps = Nt + 1, averaging_kernel = constant_averaging_kernel) + settings = SplitExplicitSettings(eltype(grid); substeps = Nt + 1, averaging_kernel = constant_averaging_kernel) sefs = sefs(settings) Nsubsteps = calculate_substeps(settings.substepping, 1) From 469224baa667d49731f02c006ea46af122e57914 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 21:16:00 -0500 Subject: [PATCH 101/138] correct for last time --- test/test_split_explicit_free_surface_solver.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index 756e03068c..de7b80f251 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -185,10 +185,8 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc settings = SplitExplicitSettings(eltype(grid); substeps = Nt + 1, averaging_kernel = constant_averaging_kernel) sefs = sefs(settings) - Nsubsteps = calculate_substeps(settings.substepping, 1) - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - - iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nsubsteps)) + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nt)) + iterate_split_explicit!(sefs, grid, Δτ_end, weights, Val(1)) η_mean_after = mean(Array(interior(η))) From d09e5fe30b2aa6d30dd4400f184ed1a57f87e25a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:15:58 -0500 Subject: [PATCH 102/138] try again --- test/test_split_explicit_free_surface_solver.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index de7b80f251..640e571624 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -184,6 +184,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc settings = SplitExplicitSettings(eltype(grid); substeps = Nt + 1, averaging_kernel = constant_averaging_kernel) sefs = sefs(settings) + + Nsubsteps = calculate_substeps(settings.substepping, 1) + fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nt)) iterate_split_explicit!(sefs, grid, Δτ_end, weights, Val(1)) From 73f8b09c746cae37c5dc034c11c85376af534365 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 9 Jan 2024 23:11:33 -0500 Subject: [PATCH 103/138] fixed --- .../split_explicit_free_surface.jl | 4 --- ...test_split_explicit_free_surface_solver.jl | 34 +++++++++---------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 3d88d92c18..90c7f1907f 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -88,10 +88,6 @@ end function SplitExplicitFreeSurface(grid; gravitational_acceleration = g_Earth, settings = SplitExplicitSettings(eltype(grid); gravitational_acceleration, substeps = 200)) - if eltype(settings) != eltype(grid) - @warn "Using $(eltype(settings)) settings for the SplitExplicitFreeSurface on a $(eltype(grid)) grid" - end - Nz = size(grid, 3) η = ZFaceField(grid, indices = (:, :, Nz+1)) gravitational_acceleration = convert(eltype(grid), gravitational_acceleration) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index 640e571624..d2ca748c66 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -12,7 +12,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc for arch in archs topology = (Periodic, Periodic, Bounded) - Nx, Ny, Nz = 128, 1, 1 + Nx, Ny, Nz = 128, 64, 1 Lx = Ly = 2π Lz = 1 / Oceananigans.BuoyancyModels.g_Earth @@ -184,11 +184,11 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc settings = SplitExplicitSettings(eltype(grid); substeps = Nt + 1, averaging_kernel = constant_averaging_kernel) sefs = sefs(settings) - - Nsubsteps = calculate_substeps(settings.substepping, 1) - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nt)) + weights = settings.substepping.averaging_weights + for i in 1:Nt + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) + end iterate_split_explicit!(sefs, grid, Δτ_end, weights, Val(1)) η_mean_after = mean(Array(interior(η))) @@ -196,30 +196,30 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc tolerance = 10eps(FT) @test abs(η_mean_after - η_mean_before) < tolerance - η_computed = Array(η.data.parent)[2:Nx+1, 2:Ny+1] - U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] - V_computed = Array(V.data.parent)[2:Nx+1, 2:Ny+1] + η_computed = Array(deepcopy(interior(η, :, 1, 1))) + U_computed = Array(deepcopy(interior(U, :, 1, 1))) + V_computed = Array(deepcopy(interior(V, :, 1, 1))) - η̅_computed = Array(η̅.data.parent)[2:Nx+1, 2:Ny+1] - U̅_computed = Array(U̅.data.parent)[2:Nx+1, 2:Ny+1] - V̅_computed = Array(V̅.data.parent)[2:Nx+1, 2:Ny+1] + η̅_computed = Array(deepcopy(interior(η̅, :, 1, 1))) + U̅_computed = Array(deepcopy(interior(U̅, :, 1, 1))) + V̅_computed = Array(deepcopy(interior(V̅, :, 1, 1))) set!(η, η₀) # ∂ₜₜ(η) = Δη - η_exact = cos(ω * T) * (Array(η.data.parent)[2:Nx+1, 2:Ny+1] .- 1) .+ 1 + η_exact = cos(ω * T) * (Array(interior(η, :, 1, 1)) .- 1) .+ 1 U₀(x, y, z) = kx * cos(kx * x) * sin(ky * y) # ∂ₜU = - ∂x(η), since we know η set!(U, U₀) - U_exact = -(sin(ω * T) * 1 / ω) .* Array(U.data.parent)[2:Nx+1, 2:Ny+1] .+ gu_c * T + U_exact = -(sin(ω * T) * 1 / ω) .* Array(interior(U, :, 1, 1)) .+ gu_c * T V₀(x, y, z) = ky * sin(kx * x) * cos(ky * y) # ∂ₜV = - ∂y(η), since we know η set!(V, V₀) - V_exact = -(sin(ω * T) * 1 / ω) .* Array(V.data.parent)[2:Nx+1, 2:Ny+1] .+ gv_c * T + V_exact = -(sin(ω * T) * 1 / ω) .* Array(interior(V, :, 1, 1)) .+ gv_c * T - η̅_exact = (sin(ω * T) / ω - sin(ω * 0) / ω) / T * (Array(η.data.parent)[2:Nx+1, 2:Ny+1] .- 1) .+ 1 - U̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(U.data.parent)[2:Nx+1, 2:Ny+1] .+ gu_c * T / 2 - V̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(V.data.parent)[2:Nx+1, 2:Ny+1] .+ gv_c * T / 2 + η̅_exact = (sin(ω * T) / ω - sin(ω * 0) / ω) / T * (Array(interior(η, :, 1, 1)) .- 1) .+ 1 + U̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(interior(U, :, 1, 1)) .+ gu_c * T / 2 + V̅_exact = (cos(ω * T) * 1 / ω^2 - cos(ω * 0) * 1 / ω^2) / T * Array(interior(V, :, 1, 1)) .+ gv_c * T / 2 tolerance = 1e-2 From 69b9b98353dd3a34457bb311ae1fdeaeb8d7dc50 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 10 Jan 2024 00:11:59 -0500 Subject: [PATCH 104/138] tests fixxed --- test/test_split_explicit_free_surface_solver.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index d2ca748c66..f30bd5769b 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -77,8 +77,11 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - - iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nsubsteps)) + + for i in 1:Nt + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) + end + iterate_split_explicit!(sefs, grid, Δτ_end, weights, Val(1)) U_computed = Array(parent(U))[2:Nx+1, 2:Ny+1] η_computed = Array(parent(η))[2:Nx+1, 2:Ny+1] From b42b115f603df2a1faf6c718b63f9d72ca4a0e86 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 10 Jan 2024 10:22:26 -0500 Subject: [PATCH 105/138] finally tests fixed --- test/test_split_explicit_free_surface_solver.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index f30bd5769b..a0d0aef3e6 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -55,11 +55,12 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc η = sefs.η T = 2π - Δτ = 2π / maximum([Nx, Ny]) * 5e-1 # the last factor is essentially the order of accuracy + Δτ = 2π / maximum([Nx, Ny]) * 5e-2 # the last factor is essentially the order of accuracy Nt = floor(Int, T / Δτ) Δτ_end = T - Nt * Δτ settings = SplitExplicitSettings(eltype(grid); substeps = Nt, averaging_kernel = constant_averaging_kernel) + sefs = sefs(settings) # set!(η, f(x,y)) η₀(x, y, z) = sin(x) @@ -75,20 +76,19 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Gᵁ .= 0 Gⱽ .= 0 - Nsubsteps = calculate_substeps(settings.substepping, 1) - fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights + weights = settings.substepping.averaging_weights for i in 1:Nt iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) end iterate_split_explicit!(sefs, grid, Δτ_end, weights, Val(1)) - U_computed = Array(parent(U))[2:Nx+1, 2:Ny+1] - η_computed = Array(parent(η))[2:Nx+1, 2:Ny+1] + U_computed = Array(deepcopy(interior(U))) + η_computed = Array(deepcopy(interior(η))) set!(η, η₀) set!(U, U₀) - U_exact = Array(parent(U))[2:Nx+1, 2:Ny+1] - η_exact = Array(parent(η))[2:Nx+1, 2:Ny+1] + U_exact = Array(deepcopy(interior(U))) + η_exact = Array(deepcopy(interior(η))) @test maximum(abs.(U_computed - U_exact)) < 1e-3 @show maximum(abs.(η_computed)) From dcffb79f7859a7b087b6caa2e877aadf262a263a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 10 Jan 2024 11:33:48 -0500 Subject: [PATCH 106/138] back to previous dt --- test/test_split_explicit_free_surface_solver.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index a0d0aef3e6..ffb5b7c34b 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -105,7 +105,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc g = sefs.gravitational_acceleration η = sefs.η - Δτ = 2π / maximum([Nx, Ny]) * 5e-1 # the last factor is essentially the order of accuracy + Δτ = 2π / maximum([Nx, Ny]) * 5e-2 # the last factor is essentially the order of accuracy # set!(η, f(x, y)) η_avg = 1 @@ -127,7 +127,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nsubsteps)) U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] From 9801ec04a6f0f41c0f91c1f059d923f03f6e9c70 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 12 Jan 2024 08:49:28 -0500 Subject: [PATCH 107/138] bugfix --- src/ImmersedBoundaries/active_cells_map.jl | 2 +- src/Utils/kernel_launching.jl | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 546902792d..5b255122a8 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -181,7 +181,7 @@ function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any ox = Rx == 1 || Tx == RightConnected ? 0 : Hx oy = Ry == 1 || Ty == RightConnected ? 0 : Hy - interior = active_interior_indices(ibg; parameters = KernelParameters((nx, ny, Nz), (ox, oy, 0))) + interior = active_interior_indices(ibg; parameters = KernelParameters((nx, ny, Nz), (ox, oy, 0))) return (; interior, west, east, south, north) end diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index f43be9a3d2..b0c6bf796c 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -128,7 +128,8 @@ function launch!(arch, grid, workspec, kernel!, kernel_args...; only_active_cells, kwargs...) - loop!(kernel_args...) + + !isnothing(loop!) && loop!(kernel_args...) return nothing end @@ -145,6 +146,7 @@ function configured_kernel(arch, grid, workspec, kernel!; reduced_dimensions, location) + offset = offsets(workspec) if !isnothing(only_active_cells) From c64f404210bf48f40dff19cb023d6a4af795475c Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 15 Jan 2024 10:28:04 -0500 Subject: [PATCH 108/138] tests fixed? --- test/test_split_explicit_free_surface_solver.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index ffb5b7c34b..640d0596c6 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -128,7 +128,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Nsubsteps = calculate_substeps(settings.substepping, 1) fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights - iterate_split_explicit!(sefs, grid, Δτ, weights, Val(Nsubsteps)) + for step in 1:Nsubsteps + iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) + end U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] V_computed = Array(V.data.parent)[2:Nx+1, 2:Ny+1] From 359a0832e8f59340b16289a73ed923b461bb15e5 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 15 Jan 2024 14:52:37 -0500 Subject: [PATCH 109/138] ale --- ...test_split_explicit_free_surface_solver.jl | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl index 640d0596c6..b4e7a53b50 100644 --- a/test/test_split_explicit_free_surface_solver.jl +++ b/test/test_split_explicit_free_surface_solver.jl @@ -59,7 +59,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc Nt = floor(Int, T / Δτ) Δτ_end = T - Nt * Δτ - settings = SplitExplicitSettings(eltype(grid); substeps = Nt, averaging_kernel = constant_averaging_kernel) + settings = SplitExplicitSettings(; substeps = Nt, averaging_kernel = constant_averaging_kernel) sefs = sefs(settings) # set!(η, f(x,y)) @@ -96,6 +96,11 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc @test maximum(abs.(η_computed - η_exact)) < max(100eps(FT), 1e-6) end + settings = SplitExplicitSettings(eltype(grid); substeps = 200, averaging_kernel = constant_averaging_kernel) + sefs = SplitExplicitFreeSurface(grid; settings) + + sefs.η .= 0 + @testset "Averaging / Do Nothing test " begin state = sefs.state auxiliary = sefs.auxiliary @@ -105,24 +110,22 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc g = sefs.gravitational_acceleration η = sefs.η - Δτ = 2π / maximum([Nx, Ny]) * 5e-2 # the last factor is essentially the order of accuracy + Δτ = 2π / maximum([Nx, Ny]) * 1e-2 # the last factor is essentially the order of accuracy # set!(η, f(x, y)) η_avg = 1 U_avg = 2 V_avg = 3 - η₀(x, y, z) = η_avg - set!(η, η₀) - U₀(x, y, z) = U_avg - set!(U, U₀) - V₀(x, y, z) = V_avg - set!(V, V₀) + fill!(η, η_avg) + fill!(U, U_avg) + fill!(V, V_avg) + + fill!(η̅ , 0) + fill!(U̅ , 0) + fill!(V̅ , 0) + fill!(Gᵁ, 0) + fill!(Gⱽ, 0) - η̅ .= 0 - U̅ .= 0 - V̅ .= 0 - Gᵁ .= 0 - Gⱽ .= 0 settings = sefs.settings Nsubsteps = calculate_substeps(settings.substepping, 1) @@ -132,13 +135,13 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: calculate_substeps, calc iterate_split_explicit!(sefs, grid, Δτ, weights, Val(1)) end - U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1] - V_computed = Array(V.data.parent)[2:Nx+1, 2:Ny+1] - η_computed = Array(η.data.parent)[2:Nx+1, 2:Ny+1] + U_computed = Array(deepcopy(interior(U))) + V_computed = Array(deepcopy(interior(V))) + η_computed = Array(deepcopy(interior(η))) - U̅_computed = Array(U̅.data.parent)[2:Nx+1, 2:Ny+1] - V̅_computed = Array(V̅.data.parent)[2:Nx+1, 2:Ny+1] - η̅_computed = Array(η̅.data.parent)[2:Nx+1, 2:Ny+1] + U̅_computed = Array(deepcopy(interior(U̅))) + V̅_computed = Array(deepcopy(interior(V̅))) + η̅_computed = Array(deepcopy(interior(η̅))) tolerance = 100eps(FT) From 72f286ef277b891436dfd230fc8b798d24a4816f Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 16 Jan 2024 09:21:12 -0500 Subject: [PATCH 110/138] Update src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl Co-authored-by: Navid C. Constantinou --- .../update_hydrostatic_free_surface_model_state.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index 43f53a766f..4c082c31b5 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -37,7 +37,6 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp @apply_regionally update_model_field_time_series!(model, model.clock) fill_halo_regions!(prognostic_fields(model), model.clock, fields(model); async = true) - @apply_regionally replace_horizontal_vector_halos!(model.velocities, model.grid) @apply_regionally compute_auxiliaries!(model) From 46ef24cff94d244137c861c4414615e0ef6a9d80 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 16 Jan 2024 09:21:24 -0500 Subject: [PATCH 111/138] Update src/TimeSteppers/quasi_adams_bashforth_2.jl Co-authored-by: Navid C. Constantinou --- src/TimeSteppers/quasi_adams_bashforth_2.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index f63dcf817f..91a01ef7a3 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -159,4 +159,4 @@ Time step velocity fields via the 2nd-order quasi Adams-Bashforth method @inbounds u[i, j, k] += convert(FT, Δt) * ((one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k]) end -@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid) = nothing +@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻, grid) = nothing From 6b74e7ae13c2478bfdcb4743b08aeec22d684efb Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:23:10 -0500 Subject: [PATCH 112/138] removed NVTX --- Project.toml | 1 - src/Utils/kernel_launching.jl | 1 - 2 files changed, 2 deletions(-) diff --git a/Project.toml b/Project.toml index 8f6c4a2351..2d0350b945 100644 --- a/Project.toml +++ b/Project.toml @@ -22,7 +22,6 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" -NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" PencilArrays = "0e08944d-e94e-41b1-9406-dcf66b6a9d2e" diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index b0c6bf796c..2fbf293971 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -7,7 +7,6 @@ using Oceananigans.Utils using Oceananigans.Grids using Oceananigans.Grids: AbstractGrid -using NVTX import Base struct KernelParameters{S, O} end From 2061300159815688e633ca87c1388501e90da431 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:23:54 -0500 Subject: [PATCH 113/138] remove one line --- src/Utils/kernel_launching.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 2fbf293971..d6da72b92f 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -145,7 +145,6 @@ function configured_kernel(arch, grid, workspec, kernel!; reduced_dimensions, location) - offset = offsets(workspec) if !isnothing(only_active_cells) From f46f7a9b85eb719ef384348a706eadfb85ba68e1 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:24:48 -0500 Subject: [PATCH 114/138] if inside --- src/Utils/kernel_launching.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index d6da72b92f..caf1e2c251 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -150,12 +150,13 @@ function configured_kernel(arch, grid, workspec, kernel!; if !isnothing(only_active_cells) workgroup, worksize = active_cells_work_layout(workgroup, worksize, only_active_cells, grid) offset = nothing - end - if worksize == 0 - return nothing + # A fully immersed domain! + if worksize == 0 + return nothing + end end - + # We can only launch offset kernels with Static sizes!!!! loop! = isnothing(offset) ? kernel!(Architectures.device(arch), workgroup, worksize) : kernel!(Architectures.device(arch), StaticSize(workgroup), OffsetStaticSize(contiguousrange(worksize, offset))) From a276111dab5ce8d8807e07c5b52b341c6bcce8d5 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:25:05 -0500 Subject: [PATCH 115/138] better comment --- src/Utils/kernel_launching.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index caf1e2c251..a6624e191a 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -151,7 +151,7 @@ function configured_kernel(arch, grid, workspec, kernel!; workgroup, worksize = active_cells_work_layout(workgroup, worksize, only_active_cells, grid) offset = nothing - # A fully immersed domain! + # A non active domain! if worksize == 0 return nothing end From 4fedc37d30b072d9b4c602027e0f4964816fb8a4 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 16 Jan 2024 10:12:53 -0500 Subject: [PATCH 116/138] some docstrings --- src/ImmersedBoundaries/active_cells_map.jl | 67 +++++++++++++++++----- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index 5b255122a8..48b157a436 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -37,23 +37,48 @@ active_map(::Val{:north}) = NorthMap() @inline use_only_active_interior_cells(::ActiveCellsIBG) = InteriorMap() @inline use_only_active_interior_cells(::DistributedActiveCellsIBG) = InteriorMap() -@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ArrayActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) +""" + active_cells_work_layout(group, size, map_type, grid) + +Compute the work layout for active cells based on the given map type and grid. + +# Arguments +- `group`: The previous workgroup. +- `size`: The previous worksize. +- `map_type`: The type of map (e.g., `InteriorMap`, `WestMap`, `EastMap`, `SouthMap`, `NorthMap`). +- `grid`: The grid containing the active cells. + +# Returns +- A tuple `(workgroup, worksize)` representing the work layout for active cells. +""" +@inline active_cells_work_layout(group, size, ::InteriorMap, grid::ArrayActiveCellsIBG) = min(length(grid.interior_active_cells), 256), length(grid.interior_active_cells) @inline active_cells_work_layout(group, size, ::InteriorMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.interior), 256), length(grid.interior_active_cells.interior) @inline active_cells_work_layout(group, size, ::WestMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.west), 256), length(grid.interior_active_cells.west) @inline active_cells_work_layout(group, size, ::EastMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.east), 256), length(grid.interior_active_cells.east) @inline active_cells_work_layout(group, size, ::SouthMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.south), 256), length(grid.interior_active_cells.south) @inline active_cells_work_layout(group, size, ::NorthMap, grid::NamedTupleActiveCellsIBG) = min(length(grid.interior_active_cells.north), 256), length(grid.interior_active_cells.north) +@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) + +""" + active_linear_index_to_tuple(idx, map, grid) +Converts a linear index to a tuple of indices based on the given map and grid. + +# Arguments +- `idx`: The linear index to convert. +- `map`: The map indicating the type of index conversion to perform. +- `grid`: The grid containing the active cells. + +# Returns +A tuple of indices corresponding to the linear index. +""" @inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::ArrayActiveCellsIBG) = Base.map(Int, grid.interior_active_cells[idx]) @inline active_linear_index_to_tuple(idx, ::InteriorMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.interior[idx]) @inline active_linear_index_to_tuple(idx, ::WestMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.west[idx]) @inline active_linear_index_to_tuple(idx, ::EastMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.east[idx]) @inline active_linear_index_to_tuple(idx, ::SouthMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.south[idx]) @inline active_linear_index_to_tuple(idx, ::NorthMap, grid::NamedTupleActiveCellsIBG) = Base.map(Int, grid.interior_active_cells.north[idx]) - -@inline active_cells_work_layout(group, size, ::SurfaceMap, grid::ActiveSurfaceIBG) = min(length(grid.surface_active_cells), 256), length(grid.surface_active_cells) - -@inline active_linear_index_to_tuple(idx, ::SurfaceMap, grid::ActiveSurfaceIBG) = Base.map(Int, grid.surface_active_cells[idx]) +@inline active_linear_index_to_tuple(idx, ::SurfaceMap, grid::ActiveSurfaceIBG) = Base.map(Int, grid.surface_active_cells[idx]) function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) @@ -62,9 +87,8 @@ function ImmersedBoundaryGrid(grid, ib; active_cells_map::Bool = true) # Create the cells map on the CPU, then switch it to the GPU if active_cells_map - interior_map = active_cells_interior_map(ibg) - surface_map = active_cells_surface_map(ibg) - surface_map = arch_array(architecture(ibg), surface_map) + interior_map = map_interior_active_cells(ibg) + surface_map = map_surface_active_cells(ibg) else interior_map = nothing surface_map = nothing @@ -107,6 +131,18 @@ const MAXUInt8 = 2^8 - 1 const MAXUInt16 = 2^16 - 1 const MAXUInt32 = 2^32 - 1 +""" + active_interior_indices(ibg; parameters = :xyz) + +Compute the indices of the active interior cells in the given immersed boundary grid. + +# Arguments +- `ibg`: The immersed boundary grid. +- `parameters`: (optional) The parameters to be used for computing the active cells. Default is `:xyz`. + +# Returns +An array of tuples representing the indices of the active interior cells. +""" function active_interior_indices(ibg; parameters = :xyz) active_cells_field = compute_interior_active_cells(ibg; parameters) @@ -146,11 +182,11 @@ end @inline add_3rd_index(t::Tuple, k) = (t[1], t[2], k) -active_cells_interior_map(ibg) = active_interior_indices(ibg; parameters = :xyz) +map_interior_active_cells(ibg) = active_interior_indices(ibg; parameters = :xyz) # In case of a `DistributedGrid` we want to have different maps depending on the # partitioning of the domain -function active_cells_interior_map(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid}) +function map_interior_active_cells(ibg::ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid}) arch = architecture(ibg) Rx, Ry, _ = arch.ranks @@ -188,17 +224,18 @@ end # If we eventually want to perform also barotropic step, `w` computation and `p` # computation only on active `columns` -function active_cells_surface_map(ibg) +function map_surface_active_cells(ibg) active_cells_field = compute_surface_active_cells(ibg) interior_cells = arch_array(CPU(), interior(active_cells_field, :, :, 1)) full_indices = findall(interior_cells) - Nx, Ny, Nz = size(ibg) + Nx, Ny, _ = size(ibg) # Reduce the size of the active_cells_map (originally a tuple of Int64) N = max(Nx, Ny) IntType = N > MAXUInt8 ? (N > MAXUInt16 ? (N > MAXUInt32 ? UInt64 : UInt32) : UInt16) : UInt8 - smaller_indices = getproperty.(full_indices, Ref(:I)) .|> Tuple{IntType, IntType} - - return smaller_indices + surface_map = getproperty.(full_indices, Ref(:I)) .|> Tuple{IntType, IntType} + surface_map = arch_array(architecture(ibg), surface_map) + + return surface_map end From 8f342fa5a773aa1e64c79d5e5a3a0bc3af00c813 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 16 Jan 2024 11:35:31 -0500 Subject: [PATCH 117/138] remove NVTX --- src/Architectures.jl | 1 - src/DistributedComputations/halo_communication.jl | 2 -- .../update_hydrostatic_free_surface_model_state.jl | 1 - 3 files changed, 4 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index be016b3f82..099769a04b 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -8,7 +8,6 @@ using CUDA using KernelAbstractions using Adapt using OffsetArrays -using NVTX """ AbstractArchitecture diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl index 5d011acafe..45a520b5ee 100644 --- a/src/DistributedComputations/halo_communication.jl +++ b/src/DistributedComputations/halo_communication.jl @@ -22,8 +22,6 @@ import Oceananigans.BoundaryConditions: fill_south_and_north_halo!, fill_bottom_and_top_halo! -using NVTX - ##### ##### MPI tags for halo communication BCs ##### diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index 4c082c31b5..4cf7c19521 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -12,7 +12,6 @@ import Oceananigans.TimeSteppers: update_state! import Oceananigans.Models.NonhydrostaticModels: compute_auxiliaries! using Oceananigans.Models: update_model_field_time_series! -using NVTX compute_auxiliary_fields!(auxiliary_fields) = Tuple(compute!(a) for a in auxiliary_fields) From b7c871a8d15435dcbf0b363cb3818992cf088b60 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 23 Jan 2024 18:32:16 -0500 Subject: [PATCH 118/138] test an hypothesis --- ...distributed_split_explicit_free_surface.jl | 16 +++++++- .../split_explicit_free_surface.jl | 17 +++++++- .../split_explicit_free_surface_kernels.jl | 39 +++++++------------ ...ulti_region_split_explicit_free_surface.jl | 10 ++++- 4 files changed, 54 insertions(+), 28 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl index d7f0fe42c5..66d30f9422 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl @@ -12,13 +12,27 @@ function SplitExplicitAuxiliaryFields(grid::DistributedGrid) Gᵁ = XFaceField(grid, indices = (:, :, Nz)) Gⱽ = YFaceField(grid, indices = (:, :, Nz)) + Hᶠᶜ = Field((Face, Center, Nothing), grid) + Hᶜᶠ = Field((Center, Face, Nothing), grid) + + calculate_column_height!(Hᶠᶜ, (Face, Center, Center)) + calculate_column_height!(Hᶜᶠ, (Center, Face, Center)) + + fill_halo_regions!((Hᶠᶜ, Hᶜᶠ)) + # In a non-parallel grid we calculate only the interior kernel_size = augmented_kernel_size(grid) kernel_offsets = augmented_kernel_offsets(grid) kernel_parameters = KernelParameters(kernel_size, kernel_offsets) - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, kernel_parameters) +end + +"""Integrate z at locations `location` and set! `height`` with the result""" +@inline function calculate_column_height!(height, location) + dz = GridMetricOperation(location, Δz, height.grid) + return sum!(height, dz) end @inline function augmented_kernel_size(grid::DistributedGrid) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 90c7f1907f..252b335517 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -179,6 +179,10 @@ Base.@kwdef struct SplitExplicitAuxiliaryFields{𝒞ℱ, ℱ𝒞, 𝒦} Gᵁ :: ℱ𝒞 "Vertically-integrated slow barotropic forcing function for `V` (`ReducedField` over ``z``)" Gⱽ :: 𝒞ℱ + "Depth at `(Face, Center)` (`ReducedField` over ``z``)" + Hᶠᶜ :: ℱ𝒞 + "Depth at `(Center, Face)` (`ReducedField` over ``z``)" + Hᶜᶠ :: 𝒞ℱ "kernel size for barotropic time stepping" kernel_parameters :: 𝒦 end @@ -195,9 +199,20 @@ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) Gᵁ = XFaceField(grid, indices = (:, :, Nz)) Gⱽ = YFaceField(grid, indices = (:, :, Nz)) + Hᶠᶜ = Field((Face, Center, Nothing), grid) + Hᶜᶠ = Field((Center, Face, Nothing), grid) + + dz = GridMetricOperation((Face, Center, Center), Δz, grid) + sum!(Hᶠᶜ, dz) + + dz = GridMetricOperation((Center, Face, Center), Δz, grid) + sum!(Hᶜᶠ, dz) + + fill_halo_regions!((Hᶠᶜ, Hᶜᶠ)) + kernel_parameters = :xy - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, kernel_parameters) end """ diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 3bd50d9c34..64a0844ba7 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -138,19 +138,6 @@ end return nothing end -# Column height for the split explicit solver -@inline column_heightᶜᶜ(i, j, k, grid) = grid.Lz - -# Column height for an GridFitted bottom immersed boundary -@inline function column_heightᶜᶜ(i, j, k, grid::GFBIBG) - bottom = grid.immersed_boundary.bottom_height[i, j, 1] - return ifelse(bottom < 0, - bottom, grid.Lz - bottom) -end - -@inline column_heightᶠᶜ(i, j, grid) = ℑxᶠᵃᵃ(i, j, 1, grid, column_heightᶜᶜ) -@inline column_heightᶜᶠ(i, j, grid) = ℑyᵃᶠᵃ(i, j, 1, grid, column_heightᶜᶜ) - - @kernel function _split_explicit_free_surface!(grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) i, j = @index(Global, NTuple) free_surface_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, V, Uᵐ⁻¹, Uᵐ⁻², Vᵐ⁻¹, Vᵐ⁻², timestepper) @@ -172,20 +159,20 @@ end @kernel function _split_explicit_barotropic_velocity!(averaging_weight, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, Gᵁ, Gⱽ, g, + η̅, U̅, V̅, Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, timestepper) i, j = @index(Global, NTuple) velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, timestepper) end @inline function velocity_evolution!(i, j, grid, Δτ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², η̅, U̅, V̅, averaging_weight, - Gᵁ, Gⱽ, g, + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, timestepper) k_top = grid.Nz+1 @@ -196,8 +183,8 @@ end advance_previous_velocity!(i, j, k_top-1, timestepper, V, Vᵐ⁻¹, Vᵐ⁻²) # ∂τ(U) = - ∇η + G - U[i, j, k_top-1] += Δτ * (- g * column_heightᶠᶜ(i, j, grid) * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) - V[i, j, k_top-1] += Δτ * (- g * column_heightᶜᶠ(i, j, grid) * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) + U[i, j, k_top-1] += Δτ * (- g * Hᶠᶜ[i, j, 1] * ∂xᶠᶜᶠ_η(i, j, k_top, grid, TX, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gᵁ[i, j, k_top-1]) + V[i, j, k_top-1] += Δτ * (- g * Hᶜᶠ[i, j, 1] * ∂yᶜᶠᶠ_η(i, j, k_top, grid, TY, η★, timestepper, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻²) + Gⱽ[i, j, k_top-1]) # time-averaging η̅[i, j, k_top] += averaging_weight * η[i, j, k_top] @@ -273,13 +260,13 @@ function initialize_auxiliary_state!(state, η, timestepper) return nothing end -@kernel function _barotropic_split_explicit_corrector!(u, v, U̅, V̅, U, V, grid) +@kernel function _barotropic_split_explicit_corrector!(u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) i, j, k = @index(Global, NTuple) k_top = grid.Nz+1 @inbounds begin - u[i, j, k] = u[i, j, k] + (U̅[i, j, k_top-1] - U[i, j, k_top-1]) / column_heightᶠᶜ(i, j, grid) - v[i, j, k] = v[i, j, k] + (V̅[i, j, k_top-1] - V[i, j, k_top-1]) / column_heightᶜᶠ(i, j, grid) + u[i, j, k] = u[i, j, k] + (U̅[i, j, k_top-1] - U[i, j, k_top-1]) / Hᶠᶜ[i, j, 1] + v[i, j, k] = v[i, j, k] + (V̅[i, j, k_top-1] - V[i, j, k_top-1]) / Hᶜᶠ[i, j, 1] end end @@ -287,14 +274,16 @@ end function barotropic_split_explicit_corrector!(u, v, free_surface, grid) sefs = free_surface.state U, V, U̅, V̅ = sefs.U, sefs.V, sefs.U̅, sefs.V̅ + Hᶠᶜ, Hᶜᶠ = free_surface.auxiliary.Hᶠᶜ, free_surface.auxiliary.Hᶜᶠ arch = architecture(grid) + # take out "bad" barotropic mode, # !!!! reusing U and V for this storage since last timestep doesn't matter compute_barotropic_mode!(U, V, grid, u, v) # add in "good" barotropic mode launch!(arch, grid, :xyz, _barotropic_split_explicit_corrector!, - u, v, U̅, V̅, U, V, grid) + u, v, U̅, V̅, U, V, Hᶠᶜ, Hᶜᶠ, grid) return nothing end @@ -386,8 +375,8 @@ function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsu Vᵐ⁻¹, Vᵐ⁻² = state.Vᵐ⁻¹, state.Vᵐ⁻² ηᵐ, ηᵐ⁻¹, ηᵐ⁻² = state.ηᵐ, state.ηᵐ⁻¹, state.ηᵐ⁻² η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅ - Gᵁ, Gⱽ = auxiliary.Gᵁ, auxiliary.Gⱽ - + Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ = auxiliary.Gᵁ, auxiliary.Gⱽ, auxiliary.Hᶠᶜ, auxiliary.Hᶜᶠ + timestepper = settings.timestepper parameters = auxiliary.kernel_parameters @@ -401,7 +390,7 @@ function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsu U_args = (grid, Δτᴮ, η, ηᵐ, ηᵐ⁻¹, ηᵐ⁻², U, Uᵐ⁻¹, Uᵐ⁻², V, Vᵐ⁻¹, Vᵐ⁻², - η̅, U̅, V̅, Gᵁ, Gⱽ, g, + η̅, U̅, V̅, Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, g, timestepper) GC.@preserve η_args U_args begin diff --git a/src/MultiRegion/multi_region_split_explicit_free_surface.jl b/src/MultiRegion/multi_region_split_explicit_free_surface.jl index 3e3c4ec842..493a2a2f9e 100644 --- a/src/MultiRegion/multi_region_split_explicit_free_surface.jl +++ b/src/MultiRegion/multi_region_split_explicit_free_surface.jl @@ -11,13 +11,21 @@ function SplitExplicitAuxiliaryFields(grid::MultiRegionGrids) Gᵁ = XFaceField(grid; indices = (:, :, Nz)) Gⱽ = YFaceField(grid; indices = (:, :, Nz)) + Hᶠᶜ = Field((Face, Center, Nothing), grid) + Hᶜᶠ = Field((Center, Face, Nothing), grid) + + @apply_regionally calculate_column_height!(Hᶠᶜ, (Face, Center, Center)) + @apply_regionally calculate_column_height!(Hᶜᶠ, (Center, Face, Center)) + + fill_halo_regions!((Hᶠᶜ, Hᶜᶠ)) + # In a non-parallel grid we calculate only the interior @apply_regionally kernel_size = augmented_kernel_size(grid, grid.partition) @apply_regionally kernel_offsets = augmented_kernel_offsets(grid, grid.partition) @apply_regionally kernel_parameters = KernelParameters(kernel_size, kernel_offsets) - return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, kernel_parameters) + return SplitExplicitAuxiliaryFields(Gᵁ, Gⱽ, Hᶠᶜ, Hᶜᶠ, kernel_parameters) end @inline function calculate_column_height!(height, location) From 7ff259d652d8a385ca250e48e51562c856438de0 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 23 Jan 2024 18:37:11 -0500 Subject: [PATCH 119/138] test it now --- .../distributed_split_explicit_free_surface.jl | 6 ++---- .../split_explicit_free_surface.jl | 6 ++---- src/MultiRegion/multi_region_split_explicit_free_surface.jl | 6 ++---- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl index 66d30f9422..32b23f1256 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/distributed_split_explicit_free_surface.jl @@ -7,10 +7,8 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: FreeSurface, SplitExpli function SplitExplicitAuxiliaryFields(grid::DistributedGrid) - Nz = size(grid, 3) - - Gᵁ = XFaceField(grid, indices = (:, :, Nz)) - Gⱽ = YFaceField(grid, indices = (:, :, Nz)) + Gᵁ = Field((Face, Center, Nothing), grid) + Gⱽ = Field((Center, Face, Nothing), grid) Hᶠᶜ = Field((Face, Center, Nothing), grid) Hᶜᶠ = Field((Center, Face, Nothing), grid) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 252b335517..f98d116ba9 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -194,10 +194,8 @@ Return the `SplitExplicitAuxiliaryFields` for `grid`. """ function SplitExplicitAuxiliaryFields(grid::AbstractGrid) - Nz = size(grid, 3) - - Gᵁ = XFaceField(grid, indices = (:, :, Nz)) - Gⱽ = YFaceField(grid, indices = (:, :, Nz)) + Gᵁ = Field((Face, Center, Nothing), grid) + Gⱽ = Field((Center, Face, Nothing), grid) Hᶠᶜ = Field((Face, Center, Nothing), grid) Hᶜᶠ = Field((Center, Face, Nothing), grid) diff --git a/src/MultiRegion/multi_region_split_explicit_free_surface.jl b/src/MultiRegion/multi_region_split_explicit_free_surface.jl index 493a2a2f9e..d33f6bd1a6 100644 --- a/src/MultiRegion/multi_region_split_explicit_free_surface.jl +++ b/src/MultiRegion/multi_region_split_explicit_free_surface.jl @@ -6,10 +6,8 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: FreeSurface, SplitExpli function SplitExplicitAuxiliaryFields(grid::MultiRegionGrids) - Nz = size(grid, 3) - - Gᵁ = XFaceField(grid; indices = (:, :, Nz)) - Gⱽ = YFaceField(grid; indices = (:, :, Nz)) + Gᵁ = Field((Face, Center, Nothing), grid) + Gⱽ = Field((Center, Face, Nothing), grid) Hᶠᶜ = Field((Face, Center, Nothing), grid) Hᶜᶠ = Field((Center, Face, Nothing), grid) From f3fa4482477d99946eebe24c99df50eb44950d49 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:43:29 -0500 Subject: [PATCH 120/138] optimization --- .../CATKEVerticalDiffusivities.jl | 80 +++++++---- .../mixing_length.jl | 125 +++++++++--------- .../turbulent_kinetic_energy_equation.jl | 32 ++--- 3 files changed, 128 insertions(+), 109 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 71d27e6719..ae2d23c5ce 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -226,7 +226,11 @@ function DiffusivityFields(grid, tracer_names, bcs, closure::FlavorOfCATKE) _tupled_tracer_diffusivities = NamedTuple(name => name === :e ? κᵉ : κᶜ for name in tracer_names) _tupled_implicit_linear_coefficients = NamedTuple(name => name === :e ? Lᵉ : ZeroField() for name in tracer_names) - return (; κᵘ, κᶜ, κᵉ, Lᵉ, Qᵇ, previous_compute_time, _tupled_tracer_diffusivities, _tupled_implicit_linear_coefficients) + S² = CenterField(grid) + N² = ZFaceField(grid) + w★ = CenterField(grid) + + return (; κᵘ, κᶜ, κᵉ, Lᵉ, Qᵇ, S², N², w★, previous_compute_time, _tupled_tracer_diffusivities, _tupled_implicit_linear_coefficients) end const c = Center() @@ -250,9 +254,13 @@ function compute_diffusivities!(diffusivities, closure::FlavorOfCATKE, model; pa diffusivities.previous_compute_time[] = model.clock.time launch!(arch, grid, :xy, - compute_average_surface_buoyancy_flux!, + _compute_average_surface_buoyancy_flux!, diffusivities.Qᵇ, grid, closure, velocities, tracers, buoyancy, top_tracer_bcs, clock, Δt) + launch!(arch, grid, parameters, + _compute_CATKE_auxiliaries!, + diffusivities, grid, closure, velocities, tracers, buoyancy) + launch!(arch, grid, parameters, compute_CATKE_diffusivities!, diffusivities, grid, closure, velocities, tracers, buoyancy) @@ -260,7 +268,22 @@ function compute_diffusivities!(diffusivities, closure::FlavorOfCATKE, model; pa return nothing end -@kernel function compute_average_surface_buoyancy_flux!(Qᵇ, grid, closure, velocities, tracers, buoyancy, top_tracer_bcs, clock, Δt) +@kernel function _compute_CATKE_auxiliaries!(diffusivities, grid, closure, velocities, tracers, buoyancy) + i, j, k = @index(Global, NTuple) + + S² = diffusivities.S² + N² = diffusivities.N² + w★ = diffusivities.w★ + u, v, w = velocities + + @inbounds begin + S²[i, j, k] = shearᶜᶜᶠ(i, j, k, grid, u, v) + N²[i, j, k] = ∂z_b(i, j, k, grid, buoyancy, tracers) + w★[i, j, k] = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e) + end +end + +@kernel function _compute_average_surface_buoyancy_flux!(Qᵇ, grid, closure, velocities, tracers, buoyancy, top_tracer_bcs, clock, Δt) i, j = @index(Global, NTuple) closure = getclosure(i, j, closure) @@ -279,17 +302,20 @@ end @inbounds Qᵇ[i, j, 1] = (Qᵇᵢⱼ + ϵ * Qᵇ★) / (1 + ϵ) end -@kernel function compute_CATKE_diffusivities!(diffusivities, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy) +@kernel function _compute_CATKE_diffusivities!(diffusivities, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy) i, j, k = @index(Global, NTuple) # Ensure this works with "ensembles" of closures, in addition to ordinary single closures closure_ij = getclosure(i, j, closure) Qᵇ = diffusivities.Qᵇ + S² = diffusivities.S² + N² = diffusivities.N² + w★ = diffusivities.w★ @inbounds begin - κᵘ★ = κuᶜᶜᶠ(i, j, k, grid, closure_ij, velocities, tracers, buoyancy, Qᵇ) - κᶜ★ = κcᶜᶜᶠ(i, j, k, grid, closure_ij, velocities, tracers, buoyancy, Qᵇ) - κᵉ★ = κeᶜᶜᶠ(i, j, k, grid, closure_ij, velocities, tracers, buoyancy, Qᵇ) + κᵘ★ = κuᶜᶜᶠ(i, j, k, grid, closure_ij, Qᵇ, S², N², w★) + κᶜ★ = κcᶜᶜᶠ(i, j, k, grid, closure_ij, Qᵇ, S², N², w★) + κᵉ★ = κeᶜᶜᶠ(i, j, k, grid, closure_ij, Qᵇ, S², N², w★) on_periphery = peripheral_node(i, j, k, grid, c, c, f) within_inactive = inactive_node(i, j, k, grid, c, c, f) @@ -315,7 +341,7 @@ end on_bottom = !inactive_cell(i, j, k, grid) & inactive_cell(i, j, k-1, grid) Δz = Δzᶜᶜᶜ(i, j, k, grid) Cᵂϵ = closure_ij.turbulent_kinetic_energy_equation.Cᵂϵ - Q_e = - Cᵂϵ * turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure_ij, tracers.e) / Δz * on_bottom + Q_e = - Cᵂϵ * w★[i, j, k] / Δz * on_bottom # Implicit TKE dissipation ω_e = dissipation_rate(i, j, k, grid, closure_ij, velocities, tracers, buoyancy, diffusivities) @@ -335,42 +361,40 @@ end return sqrt(max(eᵐⁱⁿ, eᵢ)) end -@inline function κuᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - w★ = ℑzᵃᵃᶠ(i, j, k, grid, turbulent_velocityᶜᶜᶜ, closure, tracers.e) - ℓᵘ = momentum_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - κᵘ = ℓᵘ * w★ +@inline function κuᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) + ℓᵘ = momentum_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, S², N², w★) + κᵘ = ℓᵘ * w★ᶜᶜᶠ κᵘ_max = closure.maximum_viscosity return min(κᵘ, κᵘ_max) end -@inline function κuᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - w★ = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e) - ℓᵘ = momentum_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - κᵘ = ℓᵘ * w★ +@inline function κuᶜᶜᶜ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + ℓᵘ = momentum_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, S², N², w★) + κᵘ = @inbounds ℓᵘ * w★[i, j, k] κᵘ_max = closure.maximum_viscosity return min(κᵘ, κᵘ_max) end -@inline function κcᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - w★ = ℑzᵃᵃᶠ(i, j, k, grid, turbulent_velocityᶜᶜᶜ, closure, tracers.e) - ℓᶜ = tracer_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - κᶜ = ℓᶜ * w★ +@inline function κcᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) + ℓᶜ = tracer_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + κᶜ = ℓᶜ * w★ᶜᶜᶠ κᶜ_max = closure.maximum_tracer_diffusivity return min(κᶜ, κᶜ_max) end -@inline function κcᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - w★ = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e) - ℓᶜ = tracer_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - κᶜ = ℓᶜ * w★ +@inline function κcᶜᶜᶜ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + ℓᶜ = tracer_mixing_lengthᶜᶜᶜ(ii, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + κᶜ = @inbounds ℓᶜ * w★[i, j, k] κᶜ_max = closure.maximum_tracer_diffusivity return min(κᶜ, κᶜ_max) end -@inline function κeᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - w★ = ℑzᵃᵃᶠ(i, j, k, grid, turbulent_velocityᶜᶜᶜ, closure, tracers.e) - ℓᵉ = TKE_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) - κᵉ = ℓᵉ * w★ +@inline function κeᶜᶜᶠ(i, j, k, grid, surface_buoyancy_flux, S², N², w★) + w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) + ℓᵉ = TKE_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + κᵉ = ℓᵉ * w★ᶜᶜᶠ κᵉ_max = closure.maximum_tke_diffusivity return min(κᵉ, κᵉ_max) end diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl index 445d9dd355..c66c85c965 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl @@ -50,23 +50,21 @@ end return S² end -@inline function stratification_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, e, tracers, buoyancy) +@inline function stratification_mixing_lengthᶜᶜᶠ(i, j, k, grid, N², w★) FT = eltype(grid) - N² = ∂z_b(i, j, k, grid, buoyancy, tracers) - N²⁺ = clip(N²) - w★ = ℑzᵃᵃᶠ(i, j, k, grid, turbulent_velocityᶜᶜᶜ, closure, e) - return ifelse(N²⁺ == 0, FT(Inf), w★ / sqrt(N²⁺)) + @inbounds N²⁺ = clip(N²[i, j, k]) + w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) + return ifelse(N²⁺ == 0, FT(Inf), w★ᶜᶜᶠ / sqrt(N²⁺)) end -@inline function stratification_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, e, tracers, buoyancy) +@inline function stratification_mixing_lengthᶜᶜᶜ(i, j, k, grid, N², w★) FT = eltype(grid) - N² = ℑzᵃᵃᶜ(i, j, k, grid, ∂z_b, buoyancy, tracers) - N²⁺ = clip(N²) - w★ = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, e) + N²ᶜᶜᶜ = ℑzᵃᵃᶜ(i, j, k, grid, N²) + N²⁺ = clip(N²ᶜᶜᶜ) return ifelse(N²⁺ == 0, FT(Inf), w★ / sqrt(N²⁺)) end -@inline function stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, e, velocities, tracers, buoyancy) +@inline function stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, N², w★) Cˢ = closure.mixing_length.Cˢ Cᵇ = closure.mixing_length.Cᵇ @@ -74,7 +72,7 @@ end d_down = Cᵇ * height_above_bottomᶜᶜᶠ(i, j, k, grid) d = min(d_up, d_down) - ℓᴺ = stratification_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, e, tracers, buoyancy) + ℓᴺ = stratification_mixing_lengthᶜᶜᶠ(i, j, k, grid, N², w★) ℓ = min(d, ℓᴺ) ℓ = ifelse(isnan(ℓ), d, ℓ) @@ -82,14 +80,14 @@ end return ℓ end -@inline function stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, e, velocities, tracers, buoyancy) +@inline function stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, N², w★) Cˢ = closure.mixing_length.Cˢ Cᵇ = closure.mixing_length.Cᵇ d_up = Cˢ * depthᶜᶜᶜ(i, j, k, grid) d_down = Cᵇ * height_above_bottomᶜᶜᶜ(i, j, k, grid) d = min(d_up, d_down) - ℓᴺ = stratification_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, e, tracers, buoyancy) + ℓᴺ = stratification_mixing_lengthᶜᶜᶜ(i, j, k, grid, N², w★) ℓ = min(d, ℓᴺ) ℓ = ifelse(isnan(ℓ), d, ℓ) @@ -97,34 +95,31 @@ end return ℓ end -@inline three_halves_tkeᶜᶜᶜ(i, j, k, grid, closure, e) = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, e)^3 -@inline squared_tkeᶜᶜᶜ(i, j, k, grid, closure, e) = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, e)^2 +@inline three_halves_tkeᶜᶜᶜ(i, j, k, grid, w★) = @inbounds w★[i, j, k]^3 +@inline squared_tkeᶜᶜᶜ(i, j, k, grid, w★) = @inbounds w★[i, j, k]^2 @inline function convective_length_scaleᶜᶜᶠ(i, j, k, grid, closure, Cᶜ::Number, Cᵉ::Number, Cˢᵖ::Number, - velocities, tracers, buoyancy, surface_buoyancy_flux) - - u = velocities.u - v = velocities.v + surface_buoyancy_flux, S², N², w★) Qᵇᵋ = closure.minimum_convective_buoyancy_flux Qᵇ = @inbounds surface_buoyancy_flux[i, j, 1] - w★ = ℑzᵃᵃᶠ(i, j, k, grid, turbulent_velocityᶜᶜᶜ, closure, tracers.e) - w★² = ℑzᵃᵃᶠ(i, j, k, grid, squared_tkeᶜᶜᶜ, closure, tracers.e) - w★³ = ℑzᵃᵃᶠ(i, j, k, grid, three_halves_tkeᶜᶜᶜ, closure, tracers.e) - S² = shearᶜᶜᶠ(i, j, k, grid, u, v) - N² = ∂z_b(i, j, k, grid, buoyancy, tracers) - N²_above = ∂z_b(i, j, k+1, grid, buoyancy, tracers) + w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) + w★²ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, squared_tkeᶜᶜᶜ, w★) + w★³ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, three_halves_tkeᶜᶜᶜ, w★) + S²ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, S²) + N²_local = @inbounds N²[i, j, k] + N²_above = @inbounds N²[i, j, k+1] # "Convective length" # ℓᶜ ∼ boundary layer depth according to Deardorff scaling - ℓᶜ = Cᶜ * w★³ / (Qᵇ + Qᵇᵋ) + ℓᶜ = Cᶜ * w★³ᶜᶜᶠ / (Qᵇ + Qᵇᵋ) ℓᶜ = ifelse(isnan(ℓᶜ), zero(grid), ℓᶜ) # Figure out which mixing length applies - convecting = (Qᵇ > Qᵇᵋ) & (N² < 0) + convecting = (Qᵇ > Qᵇᵋ) & (N²_local < 0) # Model for shear-convection interaction - Sp = sqrt(S²) * w★² / (Qᵇ + Qᵇᵋ) # Sp = "Sheared convection number" + Sp = sqrt(S²ᶜᶜᶠ) * w★²ᶜᶜᶠ / (Qᵇ + Qᵇᵋ) # Sp = "Sheared convection number" ϵˢᵖ = 1 - Cˢᵖ * Sp # ϵ = Sheared convection factor # Reduce convective and entraining mixing lengths by sheared convection factor @@ -133,10 +128,10 @@ end # "Entrainment length" # Ensures that w′b′ ~ Qᵇ at entrainment depth - ℓᵉ = Cᵉ * Qᵇ / (w★ * N² + Qᵇᵋ) + ℓᵉ = Cᵉ * Qᵇ / (w★ᶜᶜᶠ * N²_local + Qᵇᵋ) ℓᵉ = clip(ϵˢᵖ * ℓᵉ) - entraining = (Qᵇ > Qᵇᵋ) & (N² > 0) & (N²_above < 0) + entraining = (Qᵇ > Qᵇᵋ) & (N²_local > 0) & (N²_above < 0) ℓ = ifelse(convecting, ℓᶜ, ifelse(entraining, ℓᵉ, zero(grid))) @@ -145,19 +140,15 @@ end end @inline function convective_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Cᶜ::Number, Cᵉ::Number, Cˢᵖ::Number, - velocities, tracers, buoyancy, surface_buoyancy_flux) - - u = velocities.u - v = velocities.v + surface_buoyancy_flux, S², N², w★) Qᵇᵋ = closure.minimum_convective_buoyancy_flux Qᵇ = @inbounds surface_buoyancy_flux[i, j, 1] - w★ = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e) - w★² = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e)^2 - w★³ = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e)^3 - S² = shearᶜᶜᶜ(i, j, k, grid, u, v) - N² = ℑzᵃᵃᶜ(i, j, k, grid, ∂z_b, buoyancy, tracers) - N²_above = ℑzᵃᵃᶜ(i, j, k+1, grid, ∂z_b, buoyancy, tracers) + w★² = w★^2 + w★³ = w★^3 + S²ᶜᶜᶜ = @inbounds S²[i, j, k] + N²_local = ℑzᵃᵃᶜ(i, j, k, grid, N²) + N²_above = ℑzᵃᵃᶜ(i, j, k+1, grid, N²) # "Convective length" # ℓᶜ ∼ boundary layer depth according to Deardorff scaling @@ -165,10 +156,10 @@ end ℓᶜ = ifelse(isnan(ℓᶜ), zero(grid), ℓᶜ) # Figure out which mixing length applies - convecting = (Qᵇ > Qᵇᵋ) & (N² < 0) + convecting = (Qᵇ > Qᵇᵋ) & (N²_local < 0) # Model for shear-convection interaction - Sp = sqrt(S²) * w★² / (Qᵇ + Qᵇᵋ) # Sp = "Sheared convection number" + Sp = sqrt(S²ᶜᶜᶜ) * w★² / (Qᵇ + Qᵇᵋ) # Sp = "Sheared convection number" ϵˢᵖ = 1 - Cˢᵖ * Sp # ϵ = Sheared convection factor # Reduce convective and entraining mixing lengths by sheared convection factor @@ -177,10 +168,10 @@ end # "Entrainment length" # Ensures that w′b′ ~ Qᵇ at entrainment depth - ℓᵉ = Cᵉ * Qᵇ / (w★ * N² + Qᵇᵋ) + ℓᵉ = @inbounds Cᵉ * Qᵇ / (w★[i, j, k] * N²_local + Qᵇᵋ) ℓᵉ = clip(ϵˢᵖ * ℓᵉ) - entraining = (Qᵇ > Qᵇᵋ) & (N² > 0) & (N²_above < 0) + entraining = (Qᵇ > Qᵇᵋ) & (N²_local > 0) & (N²_above < 0) ℓ = ifelse(convecting, ℓᶜ, ifelse(entraining, ℓᵉ, zero(grid))) @@ -192,26 +183,28 @@ end @inline step(x, c, w) = max(zero(x), min(one(x), (x - c) / w)) @inline scale(Ri, σ⁻, σ⁺ , c, w) = σ⁻ + (σ⁺ - σ⁻) * step(Ri, c, w) -@inline function stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) - Ri = Riᶜᶜᶠ(i, j, k, grid, velocities, tracers, buoyancy) +@inline function stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) + S²ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, S²) + Ri = ifelse(N² ≤ 0, zero(grid), N² / S²ᶜᶜᶠ) CRi⁰ = closure.mixing_length.CRi⁰ CRiᵟ = closure.mixing_length.CRiᵟ return scale(Ri, Cˡᵒ, Cʰⁱ, CRi⁰, CRiᵟ) end -@inline function stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) - Ri = Riᶜᶜᶜ(i, j, k, grid, velocities, tracers, buoyancy) +@inline function stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) + N²ᶜᶜᶜ = ℑzᵃᵃᶠ(i, j, k, grid, N²) + Ri = ifelse(N² ≤ 0, zero(grid), N²ᶜᶜᶜ / S²) CRi⁰ = closure.mixing_length.CRi⁰ CRiᵟ = closure.mixing_length.CRiᵟ return scale(Ri, Cˡᵒ, Cʰⁱ, CRi⁰, CRiᵟ) end -@inline function momentum_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) +@inline function momentum_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, S², N², w★) Cˡᵒ = closure.mixing_length.Cˡᵒu Cʰⁱ = closure.mixing_length.Cʰⁱu - σ = stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) + σ = stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) - ℓ★ = σ * stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, tracers.e, velocities, tracers, buoyancy) + ℓ★ = σ * stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, N², w★) ℓ★ = ifelse(isnan(ℓ★), zero(grid), ℓ★) H = total_depthᶜᶜᵃ(i, j, grid) @@ -219,12 +212,12 @@ end return min(H, ℓ★) end -@inline function momentum_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) +@inline function momentum_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, S², N², w★) Cˡᵒ = closure.mixing_length.Cˡᵒu Cʰⁱ = closure.mixing_length.Cʰⁱu - σ = stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) + σ = stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) - ℓ★ = σ * stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, tracers.e, velocities, tracers, buoyancy) + ℓ★ = σ * stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, N², w★) ℓ★ = ifelse(isnan(ℓ★), zero(grid), ℓ★) H = total_depthᶜᶜᵃ(i, j, grid) @@ -232,16 +225,16 @@ end return min(H, ℓ★) end -@inline function tracer_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) +@inline function tracer_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) Cᶜ = closure.mixing_length.Cᶜc Cᵉ = closure.mixing_length.Cᵉc Cˢᵖ = closure.mixing_length.Cˢᵖ - ℓʰ = convective_length_scaleᶜᶜᶠ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, velocities, tracers, buoyancy, surface_buoyancy_flux) + ℓʰ = convective_length_scaleᶜᶜᶠ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, surface_buoyancy_flux, S², N², w★) Cˡᵒ = closure.mixing_length.Cˡᵒc Cʰⁱ = closure.mixing_length.Cʰⁱc - σ = stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) - ℓ★ = σ * stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, tracers.e, velocities, tracers, buoyancy) + σ = stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) + ℓ★ = σ * stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, N², w★) ℓʰ = ifelse(isnan(ℓʰ), zero(grid), ℓʰ) ℓ★ = ifelse(isnan(ℓ★), zero(grid), ℓ★) @@ -251,16 +244,16 @@ end return min(H, ℓᶜ) end -@inline function tracer_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) +@inline function tracer_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) Cᶜ = closure.mixing_length.Cᶜc Cᵉ = closure.mixing_length.Cᵉc Cˢᵖ = closure.mixing_length.Cˢᵖ - ℓʰ = convective_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, velocities, tracers, buoyancy, surface_buoyancy_flux) + ℓʰ = convective_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, surface_buoyancy_flux, S², N², w★) Cˡᵒ = closure.mixing_length.Cˡᵒc Cʰⁱ = closure.mixing_length.Cʰⁱc - σ = stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) - ℓ★ = σ * stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, tracers.e, velocities, tracers, buoyancy) + σ = stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) + ℓ★ = σ * stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, N², w★) ℓʰ = ifelse(isnan(ℓʰ), zero(grid), ℓʰ) ℓ★ = ifelse(isnan(ℓ★), zero(grid), ℓ★) @@ -271,16 +264,16 @@ end return min(H, ℓᶜ) end -@inline function TKE_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, velocities, tracers, buoyancy, surface_buoyancy_flux) +@inline function TKE_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) Cᶜ = closure.mixing_length.Cᶜe Cᵉ = closure.mixing_length.Cᵉe Cˢᵖ = closure.mixing_length.Cˢᵖ - ℓʰ = convective_length_scaleᶜᶜᶠ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, velocities, tracers, buoyancy, surface_buoyancy_flux) + ℓʰ = convective_length_scaleᶜᶜᶠ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, surface_buoyancy_flux, S², N², w★) Cˡᵒ = closure.mixing_length.Cˡᵒe Cʰⁱ = closure.mixing_length.Cʰⁱe - σ = stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) - ℓ★ = σ * stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, tracers.e, velocities, tracers, buoyancy) + σ = stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) + ℓ★ = σ * stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, N², w★) ℓʰ = ifelse(isnan(ℓʰ), zero(grid), ℓʰ) ℓ★ = ifelse(isnan(ℓ★), zero(grid), ℓ★) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index 33215c98e5..7600013551 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -33,8 +33,8 @@ end # Non-conservative reconstruction of shear production: closure = getclosure(i, j, closure) - κᵘ = κuᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities.Qᵇ) - S² = shearᶜᶜᶜ(i, j, k, grid, u, v) + κᵘ = @inbounds diffusivities.κᵘ[i, j, k] + S² = ℑzᵃᵃᶜ(i, j, k, grid, diffusivities.S²) return κᵘ * S² end @@ -54,7 +54,7 @@ end @inline function explicit_buoyancy_flux(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities) closure = getclosure(i, j, closure) κᶜ = κcᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities.Qᵇ) - N² = ℑzᵃᵃᶜ(i, j, k, grid, ∂z_b, buoyancy, tracers) + N² = ℑzᵃᵃᶜ(i, j, k, grid, diffusivities.N²) return - κᶜ * N² end @@ -75,23 +75,22 @@ const VITD = VerticallyImplicitTimeDiscretization return ifelse(dissipative_buoyancy_flux, zero(grid), wb) end -@inline dissipation(i, j, k, grid, closure::FlavorOfCATKE{<:VITD}, args...) = zero(grid) +@inline dissipation(i, j, k, grid, ::FlavorOfCATKE{<:VITD}, args...) = zero(grid) -@inline function dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, - buoyancy, surface_buoyancy_flux) +@inline function dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure::FlavorOfCATKE, surface_buoyancy_flux, S², N², w★) # Convective dissipation length Cᶜ = closure.turbulent_kinetic_energy_equation.CᶜD Cᵉ = closure.turbulent_kinetic_energy_equation.CᵉD Cˢᵖ = closure.mixing_length.Cˢᵖ Qᵇ = surface_buoyancy_flux - ℓʰ = convective_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, velocities, tracers, buoyancy, Qᵇ) + ℓʰ = convective_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, surface_buoyancy_flux, S², N², w★) # "Stable" dissipation length Cˡᵒ = closure.turbulent_kinetic_energy_equation.CˡᵒD Cʰⁱ = closure.turbulent_kinetic_energy_equation.CʰⁱD - σᴰ = stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, velocities, tracers, buoyancy) - ℓ★ = stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, tracers.e, velocities, tracers, buoyancy) + σᴰ = stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) + ℓ★ = stable_length_scaleᶜᶜᶜ(i, j, k, grid, closure, N², w★) ℓ★ = ℓ★ / σᴰ # Dissipation length @@ -103,12 +102,15 @@ end return min(H, ℓᴰ) end -@inline function dissipation_rate(i, j, k, grid, closure::FlavorOfCATKE, - velocities, tracers, buoyancy, diffusivities) +@inline function dissipation_rate(i, j, k, grid, closure::FlavorOfCATKE, diffusivities) - ℓᴰ = dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities.Qᵇ) + Qᵇ = diffusivities.Qᵇ + S² = diffusivities.S² + N² = diffusivities.N² + w★ = diffusivities.w★ + + ℓᴰ = dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Qᵇ, S², N², w★) e = tracers.e - FT = eltype(grid) eᵢ = @inbounds e[i, j, k] # Note: @@ -126,9 +128,9 @@ end end # Fallbacks for explicit time discretization -@inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, args...) +@inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy, diffusivities) eᵢ = @inbounds tracers.e[i, j, k] - ω = dissipation_rate(i, j, k, grid, closure, velocities, tracers, args...) + ω = dissipation_rate(i, j, k, grid, closure, diffusivities) return ω * eᵢ end From 0f946dfc203f85088f13aac5c12fb28372c8e77d Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 24 Jan 2024 14:02:02 -0500 Subject: [PATCH 121/138] bugfixes --- .../CATKEVerticalDiffusivities.jl | 22 +++++++++++-------- .../mixing_length.jl | 16 ++++++++------ .../turbulent_kinetic_energy_equation.jl | 8 +++---- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index ae2d23c5ce..f2eb49593c 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -253,16 +253,16 @@ function compute_diffusivities!(diffusivities, closure::FlavorOfCATKE, model; pa Δt = model.clock.time - diffusivities.previous_compute_time[] diffusivities.previous_compute_time[] = model.clock.time - launch!(arch, grid, :xy, - _compute_average_surface_buoyancy_flux!, - diffusivities.Qᵇ, grid, closure, velocities, tracers, buoyancy, top_tracer_bcs, clock, Δt) - launch!(arch, grid, parameters, _compute_CATKE_auxiliaries!, diffusivities, grid, closure, velocities, tracers, buoyancy) + launch!(arch, grid, :xy, + _compute_average_surface_buoyancy_flux!, + diffusivities.Qᵇ, grid, closure, diffusivities, velocities, tracers, buoyancy, top_tracer_bcs, clock, Δt) + launch!(arch, grid, parameters, - compute_CATKE_diffusivities!, + _compute_CATKE_diffusivities!, diffusivities, grid, closure, velocities, tracers, buoyancy) return nothing @@ -283,15 +283,19 @@ end end end -@kernel function _compute_average_surface_buoyancy_flux!(Qᵇ, grid, closure, velocities, tracers, buoyancy, top_tracer_bcs, clock, Δt) +@kernel function _compute_average_surface_buoyancy_flux!(Qᵇ, grid, closure, diffusivities, velocities, tracers, buoyancy, top_tracer_bcs, clock, Δt) i, j = @index(Global, NTuple) + S² = diffusivities.S² + N² = diffusivities.N² + w★ = diffusivities.w★ + closure = getclosure(i, j, closure) Qᵇ★ = top_buoyancy_flux(i, j, grid, buoyancy, top_tracer_bcs, clock, merge(velocities, tracers)) k = grid.Nz - ℓᴰ = dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, Qᵇ) + ℓᴰ = dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Qᵇ, S², N², w★) Qᵇᵋ = closure.minimum_convective_buoyancy_flux Qᵇᵢⱼ = @inbounds Qᵇ[i, j, 1] @@ -385,13 +389,13 @@ end end @inline function κcᶜᶜᶜ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) - ℓᶜ = tracer_mixing_lengthᶜᶜᶜ(ii, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) + ℓᶜ = tracer_mixing_lengthᶜᶜᶜ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) κᶜ = @inbounds ℓᶜ * w★[i, j, k] κᶜ_max = closure.maximum_tracer_diffusivity return min(κᶜ, κᶜ_max) end -@inline function κeᶜᶜᶠ(i, j, k, grid, surface_buoyancy_flux, S², N², w★) +@inline function κeᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) ℓᵉ = TKE_mixing_lengthᶜᶜᶠ(i, j, k, grid, closure, surface_buoyancy_flux, S², N², w★) κᵉ = ℓᵉ * w★ᶜᶜᶠ diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl index c66c85c965..342dd4975c 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl @@ -106,7 +106,7 @@ end w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) w★²ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, squared_tkeᶜᶜᶜ, w★) w★³ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, three_halves_tkeᶜᶜᶜ, w★) - S²ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, S²) + S²ᶜᶜᶠ = @inbounds S²[i, j, k] N²_local = @inbounds N²[i, j, k] N²_above = @inbounds N²[i, j, k+1] @@ -144,9 +144,9 @@ end Qᵇᵋ = closure.minimum_convective_buoyancy_flux Qᵇ = @inbounds surface_buoyancy_flux[i, j, 1] - w★² = w★^2 - w★³ = w★^3 - S²ᶜᶜᶜ = @inbounds S²[i, j, k] + w★² = @inbounds w★[i, j, k]^2 + w★³ = @inbounds w★[i, j, k]^3 + S²ᶜᶜᶜ = ℑzᵃᵃᶜ(i, j, k, grid, S²) N²_local = ℑzᵃᵃᶜ(i, j, k, grid, N²) N²_above = ℑzᵃᵃᶜ(i, j, k+1, grid, N²) @@ -184,8 +184,9 @@ end @inline scale(Ri, σ⁻, σ⁺ , c, w) = σ⁻ + (σ⁺ - σ⁻) * step(Ri, c, w) @inline function stability_functionᶜᶜᶠ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) - S²ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, S²) - Ri = ifelse(N² ≤ 0, zero(grid), N² / S²ᶜᶜᶠ) + N²ᶜᶜᶠ = @inbounds N²[i, j, k] + S²ᶜᶜᶠ = @inbounds S²[i, j, k] + Ri = ifelse(N²ᶜᶜᶠ ≤ 0, zero(grid), N²ᶜᶜᶠ / S²ᶜᶜᶠ) CRi⁰ = closure.mixing_length.CRi⁰ CRiᵟ = closure.mixing_length.CRiᵟ return scale(Ri, Cˡᵒ, Cʰⁱ, CRi⁰, CRiᵟ) @@ -193,7 +194,8 @@ end @inline function stability_functionᶜᶜᶜ(i, j, k, grid, closure, Cˡᵒ, Cʰⁱ, S², N²) N²ᶜᶜᶜ = ℑzᵃᵃᶠ(i, j, k, grid, N²) - Ri = ifelse(N² ≤ 0, zero(grid), N²ᶜᶜᶜ / S²) + S²ᶜᶜᶜ = ℑzᵃᵃᶠ(i, j, k, grid, S²) + Ri = ifelse(N²ᶜᶜᶜ ≤ 0, zero(grid), N²ᶜᶜᶜ / S²ᶜᶜᶜ) CRi⁰ = closure.mixing_length.CRi⁰ CRiᵟ = closure.mixing_length.CRiᵟ return scale(Ri, Cˡᵒ, Cʰⁱ, CRi⁰, CRiᵟ) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index 7600013551..03aad6314b 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -51,20 +51,20 @@ end # ℑzᵃᵃᶜ(i, j, k, grid, buoyancy_fluxᶜᶜᶠ, tracers, buoyancy, diffusivities) # Non-conservative reconstruction of buoyancy flux: -@inline function explicit_buoyancy_flux(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities) +@inline function explicit_buoyancy_flux(i, j, k, grid, closure, diffusivities) closure = getclosure(i, j, closure) - κᶜ = κcᶜᶜᶜ(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities.Qᵇ) + κᶜ = ℑzᵃᵃᶜ(i, j, k, grid, diffusivities.κᶜ) N² = ℑzᵃᵃᶜ(i, j, k, grid, diffusivities.N²) return - κᶜ * N² end @inline buoyancy_flux(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy, diffusivities) = - explicit_buoyancy_flux(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities) + explicit_buoyancy_flux(i, j, k, grid, closure, diffusivities) const VITD = VerticallyImplicitTimeDiscretization @inline function buoyancy_flux(i, j, k, grid, closure::FlavorOfCATKE{<:VITD}, velocities, tracers, buoyancy, diffusivities) - wb = explicit_buoyancy_flux(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities) + wb = explicit_buoyancy_flux(i, j, k, grid, closure, diffusivities) eⁱʲᵏ = @inbounds tracers.e[i, j, k] dissipative_buoyancy_flux = sign(wb) * sign(eⁱʲᵏ) < 0 From 27f1a280f9e4d6fe9b9034ab0fa7eee33db379c5 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Wed, 24 Jan 2024 14:18:07 -0500 Subject: [PATCH 122/138] bugfix --- .../CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl | 4 ++-- .../turbulent_kinetic_energy_equation.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index f2eb49593c..dc5fe84437 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -334,7 +334,7 @@ end # "Patankar trick" for buoyancy production (cf Patankar 1980 or Burchard et al. 2003) # If buoyancy flux is a _sink_ of TKE, we treat it implicitly. - wb = explicit_buoyancy_flux(i, j, k, grid, closure, velocities, tracers, buoyancy, diffusivities) + wb = explicit_buoyancy_flux(i, j, k, grid, closure, diffusivities) eⁱʲᵏ = @inbounds tracers.e[i, j, k] # See `buoyancy_flux` @@ -348,7 +348,7 @@ end Q_e = - Cᵂϵ * w★[i, j, k] / Δz * on_bottom # Implicit TKE dissipation - ω_e = dissipation_rate(i, j, k, grid, closure_ij, velocities, tracers, buoyancy, diffusivities) + ω_e = dissipation_rate(i, j, k, grid, closure_ij, tracers, diffusivities) diffusivities.Lᵉ[i, j, k] = - wb_e - ω_e + Q_e end diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index 03aad6314b..ae7c05471b 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -102,7 +102,7 @@ end return min(H, ℓᴰ) end -@inline function dissipation_rate(i, j, k, grid, closure::FlavorOfCATKE, diffusivities) +@inline function dissipation_rate(i, j, k, grid, closure::FlavorOfCATKE, tracers, diffusivities) Qᵇ = diffusivities.Qᵇ S² = diffusivities.S² @@ -130,7 +130,7 @@ end # Fallbacks for explicit time discretization @inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy, diffusivities) eᵢ = @inbounds tracers.e[i, j, k] - ω = dissipation_rate(i, j, k, grid, closure, diffusivities) + ω = dissipation_rate(i, j, k, grid, closure, tracers, diffusivities) return ω * eᵢ end From 0c18521a6a4c5d5dbae43c77f0c48c33f6c68642 Mon Sep 17 00:00:00 2001 From: simone-silvestri Date: Wed, 24 Jan 2024 16:47:37 -0500 Subject: [PATCH 123/138] bugfixxes --- .../CATKEVerticalDiffusivities.jl | 2 +- .../CATKEVerticalDiffusivities/mixing_length.jl | 10 ++++++---- .../turbulent_kinetic_energy_equation.jl | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index dc5fe84437..3adc62bc01 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -274,7 +274,7 @@ end S² = diffusivities.S² N² = diffusivities.N² w★ = diffusivities.w★ - u, v, w = velocities + u, v, _ = velocities @inbounds begin S²[i, j, k] = shearᶜᶜᶠ(i, j, k, grid, u, v) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl index 342dd4975c..a3dfe0585c 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/mixing_length.jl @@ -61,7 +61,8 @@ end FT = eltype(grid) N²ᶜᶜᶜ = ℑzᵃᵃᶜ(i, j, k, grid, N²) N²⁺ = clip(N²ᶜᶜᶜ) - return ifelse(N²⁺ == 0, FT(Inf), w★ / sqrt(N²⁺)) + w★ᶜᶜᶜ = @inbounds w★[i, j, k] + return ifelse(N²⁺ == 0, FT(Inf), w★ᶜᶜᶜ / sqrt(N²⁺)) end @inline function stable_length_scaleᶜᶜᶠ(i, j, k, grid, closure, N², w★) @@ -103,7 +104,7 @@ end Qᵇᵋ = closure.minimum_convective_buoyancy_flux Qᵇ = @inbounds surface_buoyancy_flux[i, j, 1] - w★ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) + w★¹ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, w★) w★²ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, squared_tkeᶜᶜᶜ, w★) w★³ᶜᶜᶠ = ℑzᵃᵃᶠ(i, j, k, grid, three_halves_tkeᶜᶜᶜ, w★) S²ᶜᶜᶠ = @inbounds S²[i, j, k] @@ -128,7 +129,7 @@ end # "Entrainment length" # Ensures that w′b′ ~ Qᵇ at entrainment depth - ℓᵉ = Cᵉ * Qᵇ / (w★ᶜᶜᶠ * N²_local + Qᵇᵋ) + ℓᵉ = Cᵉ * Qᵇ / (w★¹ᶜᶜᶠ * N²_local + Qᵇᵋ) ℓᵉ = clip(ϵˢᵖ * ℓᵉ) entraining = (Qᵇ > Qᵇᵋ) & (N²_local > 0) & (N²_above < 0) @@ -144,6 +145,7 @@ end Qᵇᵋ = closure.minimum_convective_buoyancy_flux Qᵇ = @inbounds surface_buoyancy_flux[i, j, 1] + w★¹ = @inbounds w★[i, j, k] w★² = @inbounds w★[i, j, k]^2 w★³ = @inbounds w★[i, j, k]^3 S²ᶜᶜᶜ = ℑzᵃᵃᶜ(i, j, k, grid, S²) @@ -168,7 +170,7 @@ end # "Entrainment length" # Ensures that w′b′ ~ Qᵇ at entrainment depth - ℓᵉ = @inbounds Cᵉ * Qᵇ / (w★[i, j, k] * N²_local + Qᵇᵋ) + ℓᵉ = Cᵉ * Qᵇ / (w★¹ * N²_local + Qᵇᵋ) ℓᵉ = clip(ϵˢᵖ * ℓᵉ) entraining = (Qᵇ > Qᵇᵋ) & (N²_local > 0) & (N²_above < 0) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index ae7c05471b..f5a74cdbf3 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -84,7 +84,7 @@ end Cᵉ = closure.turbulent_kinetic_energy_equation.CᵉD Cˢᵖ = closure.mixing_length.Cˢᵖ Qᵇ = surface_buoyancy_flux - ℓʰ = convective_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, surface_buoyancy_flux, S², N², w★) + ℓʰ = convective_length_scaleᶜᶜᶜ(i, j, k, grid, closure, Cᶜ, Cᵉ, Cˢᵖ, Qᵇ, S², N², w★) # "Stable" dissipation length Cˡᵒ = closure.turbulent_kinetic_energy_equation.CˡᵒD From e91a0c9bf78a8cbe1cdd70b94c04fa87e975a8b6 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 26 Jan 2024 08:30:02 -0500 Subject: [PATCH 124/138] adapt --- src/Advection/tracer_advection_operators.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Advection/tracer_advection_operators.jl b/src/Advection/tracer_advection_operators.jl index 82851cd598..28cde2f837 100644 --- a/src/Advection/tracer_advection_operators.jl +++ b/src/Advection/tracer_advection_operators.jl @@ -24,6 +24,11 @@ function TracerAdvection(; x, y, z) return TracerAdvection{max(Nx, Ny, Nz), FT}(x, y, z) end +Adapt.adapt_structure(to, scheme::TracerAdvection{N, FT}) where {N, FT} = + TracerAdvection{N, FT}(Adapt.adapt(to, scheme.x), + Adapt.adapt(to, scheme.y), + Adapt.adapt(to, scheme.z)) + @inline _advective_tracer_flux_x(args...) = advective_tracer_flux_x(args...) @inline _advective_tracer_flux_y(args...) = advective_tracer_flux_y(args...) @inline _advective_tracer_flux_z(args...) = advective_tracer_flux_z(args...) From 5fc071c294ed3c3cdff3b6eceaf32f0ab005baea Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:10:29 -0500 Subject: [PATCH 125/138] clipping to zero --- .../CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 3adc62bc01..7aeb445e12 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -271,6 +271,7 @@ end @kernel function _compute_CATKE_auxiliaries!(diffusivities, grid, closure, velocities, tracers, buoyancy) i, j, k = @index(Global, NTuple) + e = tracers.e S² = diffusivities.S² N² = diffusivities.N² w★ = diffusivities.w★ @@ -280,6 +281,7 @@ end S²[i, j, k] = shearᶜᶜᶠ(i, j, k, grid, u, v) N²[i, j, k] = ∂z_b(i, j, k, grid, buoyancy, tracers) w★[i, j, k] = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e) + e[i, j, k] = max(zero(grid), e[i ,j, k]) end end From 3ef798474e2f1e217c05dcbd806638f2b9fe8e8a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:17:15 -0500 Subject: [PATCH 126/138] shear is at faces in z --- .../CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 7aeb445e12..b5cec212d9 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -226,7 +226,7 @@ function DiffusivityFields(grid, tracer_names, bcs, closure::FlavorOfCATKE) _tupled_tracer_diffusivities = NamedTuple(name => name === :e ? κᵉ : κᶜ for name in tracer_names) _tupled_implicit_linear_coefficients = NamedTuple(name => name === :e ? Lᵉ : ZeroField() for name in tracer_names) - S² = CenterField(grid) + S² = ZFaceField(grid) N² = ZFaceField(grid) w★ = CenterField(grid) From b7efdd5ef067b55f4ca46ca59ec316bacaaac532 Mon Sep 17 00:00:00 2001 From: "Navid C. Constantinou" Date: Fri, 2 Feb 2024 18:43:33 +0200 Subject: [PATCH 127/138] code alignment --- ext/OceananigansEnzymeCoreExt.jl | 98 ++++++++++++++++---------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/ext/OceananigansEnzymeCoreExt.jl b/ext/OceananigansEnzymeCoreExt.jl index c43033a0e3..f52b53276d 100644 --- a/ext/OceananigansEnzymeCoreExt.jl +++ b/ext/OceananigansEnzymeCoreExt.jl @@ -9,7 +9,7 @@ EnzymeCore.EnzymeRules.inactive_noinl(::typeof(Oceananigans.Utils.flatten_reduce EnzymeCore.EnzymeRules.inactive(::typeof(Oceananigans.Grids.total_size), x...) = nothing @inline batch(::Val{1}, ::Type{T}) where T = T -@inline batch(::Val{N}, ::Type{T}) where {T,N} = NTuple{N,T} +@inline batch(::Val{N}, ::Type{T}) where {N, T} = NTuple{N, T} function EnzymeCore.EnzymeRules.augmented_primal(config, func::EnzymeCore.Const{Type{Field}}, @@ -18,31 +18,32 @@ function EnzymeCore.EnzymeRules.augmented_primal(config, EnzymeCore.Duplicated{<:Tuple}}, grid::EnzymeCore.Const{<:Oceananigans.Grids.AbstractGrid}, T::EnzymeCore.Const{<:DataType}; kw...) where RT - primal = if EnzymeCore.EnzymeRules.needs_primal(config) - func.val(loc.val, grid.val, T.val; kw...) - else - nothing - end - - if haskey(kw, :a) - # copy zeroing - kw[:data] = copy(kw[:data]) - end - - shadow = if EnzymeCore.EnzymeRules.width(config) == 1 - func.val(loc.val, grid.val, T.val; kw...) - else - ntuple(Val(EnzymeCore.EnzymeRules.width(config))) do i - Base.@_inline_meta - func.val(loc.val, grid.val, T.val; kw...) - end - end - - return EnzymeCore.EnzymeRules.AugmentedReturn{EnzymeCore.EnzymeRules.needs_primal(config) ? RT : Nothing, batch(Val(EnzymeCore.EnzymeRules.width(config)), RT), Nothing}(primal, shadow, nothing) + + primal = if EnzymeCore.EnzymeRules.needs_primal(config) + func.val(loc.val, grid.val, T.val; kw...) + else + nothing + end + + if haskey(kw, :a) + # copy zeroing + kw[:data] = copy(kw[:data]) + end + + shadow = if EnzymeCore.EnzymeRules.width(config) == 1 + func.val(loc.val, grid.val, T.val; kw...) + else + ntuple(Val(EnzymeCore.EnzymeRules.width(config))) do i + Base.@_inline_meta + func.val(loc.val, grid.val, T.val; kw...) + end + end + + return EnzymeCore.EnzymeRules.AugmentedReturn{EnzymeCore.EnzymeRules.needs_primal(config) ? RT : Nothing, batch(Val(EnzymeCore.EnzymeRules.width(config)), RT), Nothing}(primal, shadow, nothing) end function EnzymeCore.EnzymeRules.reverse(config::EnzymeCore.EnzymeRules.ConfigWidth{1}, func::EnzymeCore.Const{Type{Field}}, ::RT, tape, loc::Union{EnzymeCore.Const{<:Tuple}, EnzymeCore.Duplicated{<:Tuple}}, grid::EnzymeCore.Const{<:Oceananigans.Grids.AbstractGrid}, T::EnzymeCore.Const{<:DataType}; kw...) where RT - return (nothing, nothing, nothing) + return (nothing, nothing, nothing) end @@ -69,33 +70,32 @@ function EnzymeCore.EnzymeRules.augmented_primal(config, offset = Oceananigans.Utils.offsets(workspec.val) if !isnothing(only_active_cells) - workgroup, worksize = Oceananigans.Utils.active_cells_work_layout(workgroup, worksize, only_active_cells, grid.val) + workgroup, worksize = Oceananigans.Utils.active_cells_work_layout(workgroup, worksize, only_active_cells, grid.val) offset = nothing end if worksize != 0 - # We can only launch offset kernels with Static sizes!!!! - - if isnothing(offset) - loop! = kernel!.val(Oceananigans.Architectures.device(arch.val), workgroup, worksize) - dloop! = (typeof(kernel!) <: EnzymeCore.Const) ? nothing : kernel!.dval(Oceananigans.Architectures.device(arch.val), workgroup, worksize) - else - loop! = kernel!.val(Oceananigans.Architectures.device(arch.val), KernelAbstractions.StaticSize(workgroup), Oceananigans.Utils.OffsetStaticSize(contiguousrange(worksize, offset))) - dloop! = (typeof(kernel!) <: EnzymeCore.Const) ? nothing : kernel!.val(Oceananigans.Architectures.device(arch.val), KernelAbstractions.StaticSize(workgroup), Oceananigans.Utils.OffsetStaticSize(contiguousrange(worksize, offset))) - end + # We can only launch offset kernels with Static sizes!!!! - @debug "Launching kernel $kernel! with worksize $worksize and offsets $offset from $workspec.val" + if isnothing(offset) + loop! = kernel!.val(Oceananigans.Architectures.device(arch.val), workgroup, worksize) + dloop! = (typeof(kernel!) <: EnzymeCore.Const) ? nothing : kernel!.dval(Oceananigans.Architectures.device(arch.val), workgroup, worksize) + else + loop! = kernel!.val(Oceananigans.Architectures.device(arch.val), KernelAbstractions.StaticSize(workgroup), Oceananigans.Utils.OffsetStaticSize(contiguousrange(worksize, offset))) + dloop! = (typeof(kernel!) <: EnzymeCore.Const) ? nothing : kernel!.val(Oceananigans.Architectures.device(arch.val), KernelAbstractions.StaticSize(workgroup), Oceananigans.Utils.OffsetStaticSize(contiguousrange(worksize, offset))) + end + @debug "Launching kernel $kernel! with worksize $worksize and offsets $offset from $workspec.val" - duploop = (typeof(kernel!) <: EnzymeCore.Const) ? EnzymeCore.Const(loop!) : EnzymeCore.Duplicated(loop!, dloop!) + duploop = (typeof(kernel!) <: EnzymeCore.Const) ? EnzymeCore.Const(loop!) : EnzymeCore.Duplicated(loop!, dloop!) - config2 = EnzymeCore.EnzymeRules.Config{#=needsprimal=#false, #=needsshadow=#false, #=width=#EnzymeCore.EnzymeRules.width(config), EnzymeCore.EnzymeRules.overwritten(config)[5:end]}() - subtape = EnzymeCore.EnzymeRules.augmented_primal(config2, duploop, EnzymeCore.Const{Nothing}, kernel_args...).tape + config2 = EnzymeCore.EnzymeRules.Config{#=needsprimal=#false, #=needsshadow=#false, #=width=#EnzymeCore.EnzymeRules.width(config), EnzymeCore.EnzymeRules.overwritten(config)[5:end]}() + subtape = EnzymeCore.EnzymeRules.augmented_primal(config2, duploop, EnzymeCore.Const{Nothing}, kernel_args...).tape - tape = (duploop, subtape) + tape = (duploop, subtape) else - tape = nothing + tape = nothing end return EnzymeCore.EnzymeRules.AugmentedReturn{Nothing, Nothing, Any}(nothing, nothing, tape) @@ -117,20 +117,20 @@ function EnzymeCore.EnzymeRules.reverse(config::EnzymeCore.EnzymeRules.ConfigWid kwargs...) subrets = if tape !== nothing - duploop, subtape = tape + duploop, subtape = tape - config2 = EnzymeCore.EnzymeRules.Config{#=needsprimal=#false, #=needsshadow=#false, #=width=#EnzymeCore.EnzymeRules.width(config), EnzymeCore.EnzymeRules.overwritten(config)[5:end]}() + config2 = EnzymeCore.EnzymeRules.Config{#=needsprimal=#false, #=needsshadow=#false, #=width=#EnzymeCore.EnzymeRules.width(config), EnzymeCore.EnzymeRules.overwritten(config)[5:end]}() - EnzymeCore.EnzymeRules.reverse(config2, duploop, EnzymeCore.Const{Nothing}, subtape, kernel_args...) - else - ntuple(Val(length(kernel_args))) do _ - Base.@_inline_meta - nothing - end - end + EnzymeCore.EnzymeRules.reverse(config2, duploop, EnzymeCore.Const{Nothing}, subtape, kernel_args...) + else + ntuple(Val(length(kernel_args))) do _ + Base.@_inline_meta + nothing + end + end return (nothing, nothing, nothing, nothing, subrets...) end -end +end # module From 882706fbd0a9c088971ca04752a9ddc8e32a5b15 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 2 Feb 2024 12:56:51 -0500 Subject: [PATCH 128/138] add capitalization --- src/Advection/tracer_advection_operators.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Advection/tracer_advection_operators.jl b/src/Advection/tracer_advection_operators.jl index 28cde2f837..bacb9d79e2 100644 --- a/src/Advection/tracer_advection_operators.jl +++ b/src/Advection/tracer_advection_operators.jl @@ -12,7 +12,7 @@ end """ function TracerAdvection(; x, y, z) -builds a `TracerAdvection` type with different reconstructions in `x`, `y`, and `z` +Builds a `TracerAdvection` type with reconstructions schemes in `x`, `y`, and `z` """ function TracerAdvection(; x, y, z) Nx = required_halo_size(x) From e27728fb74c6bafe431566c93aab39a69d8d30eb Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Fri, 2 Feb 2024 13:10:06 -0500 Subject: [PATCH 129/138] Update src/Advection/tracer_advection_operators.jl Co-authored-by: Gregory L. Wagner --- src/Advection/tracer_advection_operators.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Advection/tracer_advection_operators.jl b/src/Advection/tracer_advection_operators.jl index bacb9d79e2..d6bba5a781 100644 --- a/src/Advection/tracer_advection_operators.jl +++ b/src/Advection/tracer_advection_operators.jl @@ -12,7 +12,7 @@ end """ function TracerAdvection(; x, y, z) -Builds a `TracerAdvection` type with reconstructions schemes in `x`, `y`, and `z` +Builds a `TracerAdvection` type with reconstruction schemes in `x`, `y`, and `z`. """ function TracerAdvection(; x, y, z) Nx = required_halo_size(x) From dacd6236b94dc37286e2fcf36b882aa9b42bb5ae Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 8 Feb 2024 11:46:39 +0100 Subject: [PATCH 130/138] adding a minimum dissipation length scale --- .../CATKEVerticalDiffusivities.jl | 6 ++++++ .../turbulent_kinetic_energy_equation.jl | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index b5cec212d9..623526000e 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -53,6 +53,7 @@ struct CATKEVerticalDiffusivity{TD, CL, FT, TKE} <: AbstractScalarDiffusivity{TD maximum_viscosity :: FT minimum_turbulent_kinetic_energy :: FT minimum_convective_buoyancy_flux :: FT + minimum_dissipation_length_scale :: FT negative_turbulent_kinetic_energy_damping_time_scale :: FT end @@ -63,6 +64,7 @@ CATKEVerticalDiffusivity{TD}(mixing_length::CL, maximum_viscosity::FT, minimum_turbulent_kinetic_energy::FT, minimum_convective_buoyancy_flux::FT, + minimum_dissipation_length_scale::FT, negative_turbulent_kinetic_energy_damping_time_scale::FT) where {TD, CL, TKE, FT} = CATKEVerticalDiffusivity{TD, CL, FT, TKE}(mixing_length, turbulent_kinetic_energy_equation, @@ -71,6 +73,7 @@ CATKEVerticalDiffusivity{TD}(mixing_length::CL, maximum_viscosity, minimum_turbulent_kinetic_energy, minimum_convective_buoyancy_flux, + minimum_dissipation_length_scale, negative_turbulent_kinetic_energy_damping_time_scale) CATKEVerticalDiffusivity(FT::DataType; kw...) = @@ -93,6 +96,7 @@ include("turbulent_kinetic_energy_equation.jl") maximum_viscosity = Inf, minimum_turbulent_kinetic_energy = 1e-6, minimum_convective_buoyancy_flux = 1e-8, + minimum_dissipation_length_scale = 1e-2, negative_turbulent_kinetic_energy_damping_time_scale = 1minute) Return the `CATKEVerticalDiffusivity` turbulence closure for vertical mixing by @@ -146,6 +150,7 @@ function CATKEVerticalDiffusivity(time_discretization::TD = VerticallyImplicitTi maximum_viscosity = Inf, minimum_turbulent_kinetic_energy = 1e-6, minimum_convective_buoyancy_flux = 1e-8, + minimum_dissipation_length_scale = 1e-2, negative_turbulent_kinetic_energy_damping_time_scale = 1minute) where TD mixing_length = convert_eltype(FT, mixing_length) @@ -158,6 +163,7 @@ function CATKEVerticalDiffusivity(time_discretization::TD = VerticallyImplicitTi FT(maximum_viscosity), FT(minimum_turbulent_kinetic_energy), FT(minimum_convective_buoyancy_flux), + FT(minimum_dissipation_length_scale), FT(negative_turbulent_kinetic_energy_damping_time_scale)) end diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index f5a74cdbf3..4bc2e09460 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -80,6 +80,7 @@ end @inline function dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure::FlavorOfCATKE, surface_buoyancy_flux, S², N², w★) # Convective dissipation length + ℓ⁻ = closure.minimum_dissipation_length_scale Cᶜ = closure.turbulent_kinetic_energy_equation.CᶜD Cᵉ = closure.turbulent_kinetic_energy_equation.CᵉD Cˢᵖ = closure.mixing_length.Cˢᵖ @@ -97,6 +98,7 @@ end ℓʰ = ifelse(isnan(ℓʰ), zero(grid), ℓʰ) ℓ★ = ifelse(isnan(ℓ★), zero(grid), ℓ★) ℓᴰ = max(ℓ★, ℓʰ) + ℓᴰ = max(ℓ⁻, ℓᴰ) H = total_depthᶜᶜᵃ(i, j, grid) return min(H, ℓᴰ) From f31615724e13adc5fcc7940e27293487d3c5fd19 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 8 Feb 2024 11:48:35 +0100 Subject: [PATCH 131/138] remove zero clipping --- .../CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 623526000e..99ad9e41ce 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -287,7 +287,6 @@ end S²[i, j, k] = shearᶜᶜᶠ(i, j, k, grid, u, v) N²[i, j, k] = ∂z_b(i, j, k, grid, buoyancy, tracers) w★[i, j, k] = turbulent_velocityᶜᶜᶜ(i, j, k, grid, closure, tracers.e) - e[i, j, k] = max(zero(grid), e[i ,j, k]) end end From a394670876514101f5b7df74be283d5a083731a8 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:47:05 +0100 Subject: [PATCH 132/138] =?UTF-8?q?conditional=20for=20`=CF=B5=20=3D=3D=20?= =?UTF-8?q?Inf`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../CATKEVerticalDiffusivities.jl | 9 ++++----- .../turbulent_kinetic_energy_equation.jl | 2 -- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 99ad9e41ce..004cb61d4c 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -53,7 +53,6 @@ struct CATKEVerticalDiffusivity{TD, CL, FT, TKE} <: AbstractScalarDiffusivity{TD maximum_viscosity :: FT minimum_turbulent_kinetic_energy :: FT minimum_convective_buoyancy_flux :: FT - minimum_dissipation_length_scale :: FT negative_turbulent_kinetic_energy_damping_time_scale :: FT end @@ -64,7 +63,6 @@ CATKEVerticalDiffusivity{TD}(mixing_length::CL, maximum_viscosity::FT, minimum_turbulent_kinetic_energy::FT, minimum_convective_buoyancy_flux::FT, - minimum_dissipation_length_scale::FT, negative_turbulent_kinetic_energy_damping_time_scale::FT) where {TD, CL, TKE, FT} = CATKEVerticalDiffusivity{TD, CL, FT, TKE}(mixing_length, turbulent_kinetic_energy_equation, @@ -73,7 +71,6 @@ CATKEVerticalDiffusivity{TD}(mixing_length::CL, maximum_viscosity, minimum_turbulent_kinetic_energy, minimum_convective_buoyancy_flux, - minimum_dissipation_length_scale, negative_turbulent_kinetic_energy_damping_time_scale) CATKEVerticalDiffusivity(FT::DataType; kw...) = @@ -307,10 +304,12 @@ end Qᵇᵋ = closure.minimum_convective_buoyancy_flux Qᵇᵢⱼ = @inbounds Qᵇ[i, j, 1] Qᵇ⁺ = max(Qᵇᵋ, Qᵇᵢⱼ, Qᵇ★) # selects fastest (dominant) time-scale - t★ = (ℓᴰ^2 / Qᵇ⁺)^(1/3) + t★ = (ℓᴰ^2 / Qᵇ⁺)^(1/3) ϵ = Δt / t★ - @inbounds Qᵇ[i, j, 1] = (Qᵇᵢⱼ + ϵ * Qᵇ★) / (1 + ϵ) + Qᵇ⁺ᵢⱼ = (Qᵇᵢⱼ + ϵ * Qᵇ★) / (1 + ϵ) + + @inbounds Qᵇ[i, j, 1] = ifelse(ϵ == Inf, Qᵇ★, Qᵇ⁺ᵢⱼ) # avoid problems when `ϵ == Inf` that leads to `NaN` end @kernel function _compute_CATKE_diffusivities!(diffusivities, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index 4bc2e09460..f5a74cdbf3 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -80,7 +80,6 @@ end @inline function dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure::FlavorOfCATKE, surface_buoyancy_flux, S², N², w★) # Convective dissipation length - ℓ⁻ = closure.minimum_dissipation_length_scale Cᶜ = closure.turbulent_kinetic_energy_equation.CᶜD Cᵉ = closure.turbulent_kinetic_energy_equation.CᵉD Cˢᵖ = closure.mixing_length.Cˢᵖ @@ -98,7 +97,6 @@ end ℓʰ = ifelse(isnan(ℓʰ), zero(grid), ℓʰ) ℓ★ = ifelse(isnan(ℓ★), zero(grid), ℓ★) ℓᴰ = max(ℓ★, ℓʰ) - ℓᴰ = max(ℓ⁻, ℓᴰ) H = total_depthᶜᶜᵃ(i, j, grid) return min(H, ℓᴰ) From ac08ab5d2582ff871a65c86d23bb0108c1297580 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 9 Feb 2024 10:32:28 +0100 Subject: [PATCH 133/138] bugfix --- .../CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 004cb61d4c..23eb938e2d 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -147,7 +147,6 @@ function CATKEVerticalDiffusivity(time_discretization::TD = VerticallyImplicitTi maximum_viscosity = Inf, minimum_turbulent_kinetic_energy = 1e-6, minimum_convective_buoyancy_flux = 1e-8, - minimum_dissipation_length_scale = 1e-2, negative_turbulent_kinetic_energy_damping_time_scale = 1minute) where TD mixing_length = convert_eltype(FT, mixing_length) @@ -160,7 +159,6 @@ function CATKEVerticalDiffusivity(time_discretization::TD = VerticallyImplicitTi FT(maximum_viscosity), FT(minimum_turbulent_kinetic_energy), FT(minimum_convective_buoyancy_flux), - FT(minimum_dissipation_length_scale), FT(negative_turbulent_kinetic_energy_damping_time_scale)) end From b13c72853acadd27522a4e9b462a90a06e1115c8 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 12 Feb 2024 15:33:02 -0500 Subject: [PATCH 134/138] should preserve positivity --- .../CATKEVerticalDiffusivities.jl | 1 + .../turbulent_kinetic_energy_equation.jl | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 23eb938e2d..da166d0b53 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -353,6 +353,7 @@ end # Implicit TKE dissipation ω_e = dissipation_rate(i, j, k, grid, closure_ij, tracers, diffusivities) + ω_e = ifelse(eⁱʲᵏ > 0, ω_e, zero(grid)) diffusivities.Lᵉ[i, j, k] = - wb_e - ω_e + Q_e end diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index f5a74cdbf3..42289b11fe 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -75,8 +75,6 @@ const VITD = VerticallyImplicitTimeDiscretization return ifelse(dissipative_buoyancy_flux, zero(grid), wb) end -@inline dissipation(i, j, k, grid, ::FlavorOfCATKE{<:VITD}, args...) = zero(grid) - @inline function dissipation_length_scaleᶜᶜᶜ(i, j, k, grid, closure::FlavorOfCATKE, surface_buoyancy_flux, S², N², w★) # Convective dissipation length @@ -121,17 +119,17 @@ end # # and thus L = - Cᴰ √e / ℓ . - ω_numerical = 1 / closure.negative_turbulent_kinetic_energy_damping_time_scale - ω_physical = sqrt(abs(eᵢ)) / ℓᴰ + ω = sqrt(abs(eᵢ)) / ℓᴰ - return ifelse(eᵢ < 0, ω_numerical, ω_physical) + return ω end -# Fallbacks for explicit time discretization +# Dissipation: if e is negative treat it implicitly, otherwise explicit (we do not want to subtract +# terms to the diagonal, same treatment as the buoyancy flux) @inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy, diffusivities) - eᵢ = @inbounds tracers.e[i, j, k] + eⁱʲᵏ = @inbounds tracers.e[i, j, k] ω = dissipation_rate(i, j, k, grid, closure, tracers, diffusivities) - return ω * eᵢ + return ifelse(eⁱʲᵏ < 0, ω * eᵢ, zero(grid)) end ##### From 9832f00aa2c00aff406ff93e09572b89ce268feb Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 12 Feb 2024 15:34:55 -0500 Subject: [PATCH 135/138] better comment --- .../turbulent_kinetic_energy_equation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index 42289b11fe..1e99e89a01 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -124,8 +124,8 @@ end return ω end -# Dissipation: if e is negative treat it implicitly, otherwise explicit (we do not want to subtract -# terms to the diagonal, same treatment as the buoyancy flux) +# Dissipation: if e is positive treat it implicitly, otherwise we treat it explicitly. +# Here we apply the same treatment as for the buoyancy flux: we do not want to subtract terms to the diagonal @inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy, diffusivities) eⁱʲᵏ = @inbounds tracers.e[i, j, k] ω = dissipation_rate(i, j, k, grid, closure, tracers, diffusivities) From 93a493223cd63e292c5c9b6c71a13c12259bceca Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 12 Feb 2024 15:40:44 -0500 Subject: [PATCH 136/138] correct the sign of implicit w'b' --- .../CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index da166d0b53..040218a247 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -355,7 +355,7 @@ end ω_e = dissipation_rate(i, j, k, grid, closure_ij, tracers, diffusivities) ω_e = ifelse(eⁱʲᵏ > 0, ω_e, zero(grid)) - diffusivities.Lᵉ[i, j, k] = - wb_e - ω_e + Q_e + diffusivities.Lᵉ[i, j, k] = - ω_e + wb_e + Q_e end end From 3e5a0d9b2ba71bb4c7d4490d487167fe9367ca91 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:31:25 -0500 Subject: [PATCH 137/138] bugfix --- .../turbulent_kinetic_energy_equation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index 1e99e89a01..a178a76357 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -129,7 +129,7 @@ end @inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy, diffusivities) eⁱʲᵏ = @inbounds tracers.e[i, j, k] ω = dissipation_rate(i, j, k, grid, closure, tracers, diffusivities) - return ifelse(eⁱʲᵏ < 0, ω * eᵢ, zero(grid)) + return ifelse(eⁱʲᵏ < 0, ω * eⁱʲᵏ, zero(grid)) end ##### From 5958cb6ab333929263070cd6c5bd9fb0988a33a9 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:02:51 -0500 Subject: [PATCH 138/138] back to fully implicit dissipation --- .../CATKEVerticalDiffusivities.jl | 1 - .../turbulent_kinetic_energy_equation.jl | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl index 040218a247..ed9e513fd2 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/CATKEVerticalDiffusivities.jl @@ -353,7 +353,6 @@ end # Implicit TKE dissipation ω_e = dissipation_rate(i, j, k, grid, closure_ij, tracers, diffusivities) - ω_e = ifelse(eⁱʲᵏ > 0, ω_e, zero(grid)) diffusivities.Lᵉ[i, j, k] = - ω_e + wb_e + Q_e end diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index a178a76357..1e8288a555 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -119,19 +119,21 @@ end # # and thus L = - Cᴰ √e / ℓ . - ω = sqrt(abs(eᵢ)) / ℓᴰ + ω_numerical = 1 / closure.negative_turbulent_kinetic_energy_damping_time_scale + ω_physical = sqrt(abs(eᵢ)) / ℓᴰ - return ω + return ifelse(eᵢ < 0, ω_numerical, ω_physical) end -# Dissipation: if e is positive treat it implicitly, otherwise we treat it explicitly. -# Here we apply the same treatment as for the buoyancy flux: we do not want to subtract terms to the diagonal -@inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, buoyancy, diffusivities) - eⁱʲᵏ = @inbounds tracers.e[i, j, k] - ω = dissipation_rate(i, j, k, grid, closure, tracers, diffusivities) - return ifelse(eⁱʲᵏ < 0, ω * eⁱʲᵏ, zero(grid)) +# Fallbacks for explicit time discretization +@inline function dissipation(i, j, k, grid, closure::FlavorOfCATKE, velocities, tracers, args...) + eᵢ = @inbounds tracers.e[i, j, k] + ω = dissipation_rate(i, j, k, grid, closure, velocities, tracers, args...) + return ω * eᵢ end +dissipation(i, j, k, grid, closure::FlavorOfCATKE{<:VITD}, velocities, tracers, args...) = zero(grid) + ##### ##### For closure tuples... #####