From ce3820d74bbe229fed20a9fdc8acdf7d38c948d3 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 29 Feb 2024 19:36:00 -0500 Subject: [PATCH 01/30] first commit --- src/AbstractOperations/AbstractOperations.jl | 2 +- src/AbstractOperations/binary_operations.jl | 9 ++++ .../conditional_operations.jl | 11 ++++- src/AbstractOperations/derivatives.jl | 9 ++++ src/AbstractOperations/grid_metrics.jl | 5 +++ .../kernel_function_operation.jl | 5 +++ src/AbstractOperations/multiary_operations.jl | 7 ++++ src/AbstractOperations/unary_operations.jl | 6 +++ src/Advection/Advection.jl | 3 +- src/Advection/centered_reconstruction.jl | 6 +++ src/Advection/reconstruction_coefficients.jl | 8 ++-- src/Advection/stretched_weno_smoothness.jl | 4 +- src/Advection/upwind_biased_reconstruction.jl | 7 ++++ src/Advection/vector_invariant_advection.jl | 8 ++++ src/Advection/vector_invariant_upwinding.jl | 1 - src/Advection/weno_reconstruction.jl | 8 ++++ src/Architectures.jl | 42 +++++++++---------- src/BoundaryConditions/boundary_condition.jl | 6 +++ .../continuous_boundary_function.jl | 7 ++++ .../discrete_boundary_function.jl | 3 ++ .../distributed_architectures.jl | 4 +- .../distributed_fields.jl | 2 +- .../partition_assemble.jl | 10 ++--- src/Fields/Fields.jl | 2 +- src/Fields/field.jl | 14 +++++++ src/Fields/field_boundary_buffers.jl | 34 ++++++++++----- src/Fields/function_field.jl | 7 ++++ src/Fields/regridding_fields.jl | 2 +- src/Fields/set!.jl | 4 +- src/Forcings/advective_forcing.jl | 6 +++ src/Forcings/continuous_forcing.jl | 9 ++++ src/Forcings/discrete_forcing.jl | 4 ++ src/Forcings/multiple_forcings.jl | 1 + src/Grids/grid_generation.jl | 12 ++---- src/Grids/grid_utils.jl | 4 +- src/Grids/latitude_longitude_grid.jl | 8 ++-- src/Grids/orthogonal_spherical_shell_grid.jl | 12 +++--- src/Grids/rectilinear_grid.jl | 2 +- src/ImmersedBoundaries/active_cells_map.jl | 8 ++-- .../distributed_immersed_boundaries.jl | 2 +- src/ImmersedBoundaries/grid_fitted_bottom.jl | 2 +- src/ImmersedBoundaries/immersed_reductions.jl | 2 +- src/ImmersedBoundaries/partial_cell_bottom.jl | 2 +- .../matrix_implicit_free_surface_solver.jl | 12 +++--- .../multi_region_boundary_conditions.jl | 2 +- src/MultiRegion/multi_region_grid.jl | 4 +- src/MultiRegion/x_partitions.jl | 8 ++-- src/MultiRegion/y_partitions.jl | 8 ++-- src/OutputReaders/field_time_series.jl | 4 +- .../field_time_series_indexing.jl | 2 +- src/Solvers/Solvers.jl | 2 +- src/Solvers/batched_tridiagonal_solver.jl | 6 +-- src/Solvers/discrete_transforms.jl | 4 +- src/Solvers/fft_based_poisson_solver.jl | 8 ++-- .../fourier_tridiagonal_poisson_solver.jl | 12 +++--- src/Solvers/heptadiagonal_iterative_solver.jl | 18 ++++---- src/Solvers/sparse_preconditioners.jl | 6 +-- .../implicit_explicit_time_discretization.jl | 2 +- ...vective_adjustment_vertical_diffusivity.jl | 2 +- .../isopycnal_skew_symmetric_diffusivity.jl | 2 +- .../ri_based_vertical_diffusivity.jl | 2 +- test/test_abstract_operations.jl | 8 ++-- test/test_broadcasting.jl | 2 +- test/test_field.jl | 18 ++++---- test/test_field_reductions.jl | 2 +- ...static_free_surface_immersed_boundaries.jl | 2 +- ...face_immersed_boundaries_implicit_solve.jl | 4 +- test/test_lagrangian_particle_tracking.jl | 24 +++++------ test/test_matrix_poisson_solver.jl | 18 ++++---- test/test_multi_region_poisson_solver.jl | 4 +- test/test_sewater_density.jl | 2 +- test/test_shallow_water_models.jl | 2 +- validation/barotropic_gyre/barotropic_gyre.jl | 2 +- ...multi_region_near_global_quarter_degree.jl | 4 +- ...ear_global_shallow_water_quarter_degree.jl | 6 +-- 75 files changed, 327 insertions(+), 185 deletions(-) diff --git a/src/AbstractOperations/AbstractOperations.jl b/src/AbstractOperations/AbstractOperations.jl index f662d98899..9656716307 100644 --- a/src/AbstractOperations/AbstractOperations.jl +++ b/src/AbstractOperations/AbstractOperations.jl @@ -22,7 +22,7 @@ using Oceananigans.Operators: interpolation_operator using Oceananigans.Architectures: device using Oceananigans: AbstractModel -import Oceananigans.Architectures: architecture +import Oceananigans.Architectures: architecture, on_architecture import Oceananigans.BoundaryConditions: fill_halo_regions! import Oceananigans.Fields: compute_at!, indices diff --git a/src/AbstractOperations/binary_operations.jl b/src/AbstractOperations/binary_operations.jl index 2414a1c19c..31a0a80245 100644 --- a/src/AbstractOperations/binary_operations.jl +++ b/src/AbstractOperations/binary_operations.jl @@ -216,3 +216,12 @@ Adapt.adapt_structure(to, binary::BinaryOperation{LX, LY, LZ}) where {LX, LY, LZ Adapt.adapt(to, binary.▶a), Adapt.adapt(to, binary.▶b), Adapt.adapt(to, binary.grid)) + + +on_architecture(to, binary::BinaryOperation{LX, LY, LZ}) where {LX, LY, LZ} = + BinaryOperation{LX, LY, LZ}(on_architecture(to, binary.op), + on_architecture(to, binary.a), + on_architecture(to, binary.b), + on_architecture(to, binary.▶a), + on_architecture(to, binary.▶b), + on_architecture(to, binary.grid)) diff --git a/src/AbstractOperations/conditional_operations.jl b/src/AbstractOperations/conditional_operations.jl index cf183c7f29..e621bcd262 100644 --- a/src/AbstractOperations/conditional_operations.jl +++ b/src/AbstractOperations/conditional_operations.jl @@ -1,6 +1,6 @@ using Oceananigans.Fields: OneField using Oceananigans.Grids: architecture -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture import Oceananigans.Fields: condition_operand, conditional_length, set!, compute_at!, indices # For conditional reductions such as mean(u * v, condition = u .> 0)) @@ -106,7 +106,7 @@ end @inline condition_operand(func::Function, op::AbstractField, ::Nothing, mask) = ConditionalOperation(op; func, condition=TrueCondition(), mask) @inline function condition_operand(func::Function, operand::AbstractField, condition::AbstractArray, mask) - condition = arch_array(architecture(operand.grid), condition) + condition = on_architecture(architecture(operand.grid), condition) return ConditionalOperation(operand; func, condition, mask) end @@ -134,6 +134,13 @@ Adapt.adapt_structure(to, c::ConditionalOperation{LX, LY, LZ}) where {LX, LY, LZ adapt(to, c.condition), adapt(to, c.mask)) +on_architecture(to, c::ConditionalOperation{LX, LY, LZ}) where {LX, LY, LZ} = + ConditionalOperation{LX, LY, LZ}(on_architecture(to, c.operand), + on_architecture(to, c.func), + on_architecture(to, c.grid), + on_architecture(to, c.condition), + on_architecture(to, c.mask)) + Base.summary(c::ConditionalOperation) = string("ConditionalOperation of ", summary(c.operand), " with condition ", summary(c.condition)) compute_at!(c::ConditionalOperation, time) = compute_at!(c.operand, time) diff --git a/src/AbstractOperations/derivatives.jl b/src/AbstractOperations/derivatives.jl index 344ecd47de..751e794b94 100644 --- a/src/AbstractOperations/derivatives.jl +++ b/src/AbstractOperations/derivatives.jl @@ -125,3 +125,12 @@ Adapt.adapt_structure(to, deriv::Derivative{LX, LY, LZ}) where {LX, LY, LZ} = Adapt.adapt(to, deriv.▶), nothing, Adapt.adapt(to, deriv.grid)) + +"Adapt `Derivative` to work on the GPU via CUDAnative and CUDAdrv." +on_architecture(to, deriv::Derivative{LX, LY, LZ}) where {LX, LY, LZ} = + Derivative{LX, LY, LZ}(on_architecture(to, deriv.∂), + on_architecture(to, deriv.arg), + on_architecture(to, deriv.▶), + deriv.abstract_∂, + on_architecture(to, deriv.grid)) + \ No newline at end of file diff --git a/src/AbstractOperations/grid_metrics.jl b/src/AbstractOperations/grid_metrics.jl index a6eb5b8728..822c6df173 100644 --- a/src/AbstractOperations/grid_metrics.jl +++ b/src/AbstractOperations/grid_metrics.jl @@ -130,6 +130,11 @@ Adapt.adapt_structure(to, gm::GridMetricOperation{LX, LY, LZ}) where {LX, LY, LZ GridMetricOperation{LX, LY, LZ}(Adapt.adapt(to, gm.metric), Adapt.adapt(to, gm.grid)) +on_architecture(to, gm::GridMetricOperation{LX, LY, LZ}) where {LX, LY, LZ} = + GridMetricOperation{LX, LY, LZ}(on_architecture(to, gm.metric), + on_architecture(to, gm.grid)) + + @inline Base.getindex(gm::GridMetricOperation, i, j, k) = gm.metric(i, j, k, gm.grid) indices(::GridMetricOperation) = default_indices(3) diff --git a/src/AbstractOperations/kernel_function_operation.jl b/src/AbstractOperations/kernel_function_operation.jl index 821040a920..dbd89365ac 100644 --- a/src/AbstractOperations/kernel_function_operation.jl +++ b/src/AbstractOperations/kernel_function_operation.jl @@ -80,6 +80,11 @@ Adapt.adapt_structure(to, κ::KernelFunctionOperation{LX, LY, LZ}) where {LX, LY Adapt.adapt(to, κ.grid), Tuple(Adapt.adapt(to, a) for a in κ.arguments)...) +on_architecture(to, κ::KernelFunctionOperation{LX, LY, LZ}) where {LX, LY, LZ} = + KernelFunctionOperation{LX, LY, LZ}(on_architecture(to, κ.kernel_function), + on_architecture(to, κ.grid), + Tuple(on_architecture(to, a) for a in κ.arguments)...) + Base.show(io::IO, kfo::KernelFunctionOperation) = print(io, summary(kfo), '\n', diff --git a/src/AbstractOperations/multiary_operations.jl b/src/AbstractOperations/multiary_operations.jl index 3a05addc30..b09168c4d0 100644 --- a/src/AbstractOperations/multiary_operations.jl +++ b/src/AbstractOperations/multiary_operations.jl @@ -150,3 +150,10 @@ Adapt.adapt_structure(to, multiary::MultiaryOperation{LX, LY, LZ}) where {LX, LY Adapt.adapt(to, multiary.args), Adapt.adapt(to, multiary.▶), Adapt.adapt(to, multiary.grid)) + +on_architecture(to, multiary::MultiaryOperation{LX, LY, LZ}) where {LX, LY, LZ} = + MultiaryOperation{LX, LY, LZ}(on_architecture(to, multiary.op), + on_architecture(to, multiary.args), + on_architecture(to, multiary.▶), + on_architecture(to, multiary.grid)) + \ No newline at end of file diff --git a/src/AbstractOperations/unary_operations.jl b/src/AbstractOperations/unary_operations.jl index 9670339c25..7c917a2132 100644 --- a/src/AbstractOperations/unary_operations.jl +++ b/src/AbstractOperations/unary_operations.jl @@ -130,3 +130,9 @@ Adapt.adapt_structure(to, unary::UnaryOperation{LX, LY, LZ}) where {LX, LY, LZ} Adapt.adapt(to, unary.arg), Adapt.adapt(to, unary.▶), Adapt.adapt(to, unary.grid)) + +on_architecture(to, unary::UnaryOperation{LX, LY, LZ}) where {LX, LY, LZ} = + UnaryOperation{LX, LY, LZ}(on_architecture(to, unary.op), + on_architecture(to, unary.arg), + on_architecture(to, unary.▶), + on_architecture(to, unary.grid)) diff --git a/src/Advection/Advection.jl b/src/Advection/Advection.jl index c2e091c663..d949070438 100644 --- a/src/Advection/Advection.jl +++ b/src/Advection/Advection.jl @@ -32,12 +32,13 @@ using OffsetArrays using Oceananigans.Grids using Oceananigans.Grids: with_halo, coordinates -using Oceananigans.Architectures: arch_array, architecture, CPU +using Oceananigans.Architectures: on_architecture, architecture, CPU using Oceananigans.Operators import Base: show, summary import Oceananigans.Grids: required_halo_size +import Oceananigans.Architectures: on_architecture abstract type AbstractAdvectionScheme{B, FT} end abstract type AbstractCenteredAdvectionScheme{B, FT} <: AbstractAdvectionScheme{B, FT} end diff --git a/src/Advection/centered_reconstruction.jl b/src/Advection/centered_reconstruction.jl index 40cf60203b..b6f5010f80 100644 --- a/src/Advection/centered_reconstruction.jl +++ b/src/Advection/centered_reconstruction.jl @@ -76,6 +76,12 @@ Adapt.adapt_structure(to, scheme::Centered{N, FT}) where {N, FT} = Adapt.adapt(to, scheme.coeff_zᵃᵃᶠ), Adapt.adapt(to, scheme.coeff_zᵃᵃᶜ), Adapt.adapt(to, scheme.buffer_scheme)) +on_architecture(to, scheme::Centered{N, FT}) where {N, FT} = + Centered{N, FT}(on_architecture(to, scheme.coeff_xᶠᵃᵃ), on_architecture(to, scheme.coeff_xᶜᵃᵃ), + on_architecture(to, scheme.coeff_yᵃᶠᵃ), on_architecture(to, scheme.coeff_yᵃᶜᵃ), + on_architecture(to, scheme.coeff_zᵃᵃᶠ), on_architecture(to, scheme.coeff_zᵃᵃᶜ), + on_architecture(to, scheme.buffer_scheme)) + # Useful aliases Centered(grid, FT::DataType=Float64; kwargs...) = Centered(FT; grid, kwargs...) diff --git a/src/Advection/reconstruction_coefficients.jl b/src/Advection/reconstruction_coefficients.jl index e0c72a88f4..adcf18c925 100644 --- a/src/Advection/reconstruction_coefficients.jl +++ b/src/Advection/reconstruction_coefficients.jl @@ -243,7 +243,7 @@ end # Stretched reconstruction coefficients for `Centered` schemes @inline function calc_reconstruction_coefficients(FT, coord, arch, N, ::Val{1}; order) - cpu_coord = arch_array(CPU(), coord) + cpu_coord = on_architecture(CPU(), coord) r = ((order + 1) ÷ 2) - 1 s = create_reconstruction_coefficients(FT, r, cpu_coord, arch, N; order) return s @@ -251,7 +251,7 @@ end # Stretched reconstruction coefficients for `UpwindBiased` schemes @inline function calc_reconstruction_coefficients(FT, coord, arch, N, ::Val{2}; order) - cpu_coord = arch_array(CPU(), coord) + cpu_coord = on_architecture(CPU(), coord) rleft = ((order + 1) ÷ 2) - 2 rright = ((order + 1) ÷ 2) - 1 s = [] @@ -264,7 +264,7 @@ end # Stretched reconstruction coefficients for `WENO` schemes @inline function calc_reconstruction_coefficients(FT, coord, arch, N, ::Val{3}; order) - cpu_coord = arch_array(CPU(), coord) + cpu_coord = on_architecture(CPU(), coord) s = [] for r in -1:order-1 push!(s, create_reconstruction_coefficients(FT, r, cpu_coord, arch, N; order)) @@ -280,5 +280,5 @@ end push!(stencil, stencil_coefficients(i, r, cpu_coord, cpu_coord; order)) end end - return OffsetArray(arch_array(arch, stencil), -1) + return OffsetArray(on_architecture(arch, stencil), -1) end diff --git a/src/Advection/stretched_weno_smoothness.jl b/src/Advection/stretched_weno_smoothness.jl index 3b3c49bac8..ec37b87b6c 100644 --- a/src/Advection/stretched_weno_smoothness.jl +++ b/src/Advection/stretched_weno_smoothness.jl @@ -61,7 +61,7 @@ end function calc_smoothness_coefficients(FT, beta, coord, arch, N; order) - cpu_coord = arch_array(CPU(), coord) + cpu_coord = on_architecture(CPU(), coord) order == 3 || throw(ArgumentError("The stretched smoothness coefficients are only implemented for order == 3")) @@ -104,7 +104,7 @@ function create_smoothness_coefficients(FT, r, op, cpu_coord, arch, N; order) end end - return OffsetArray(arch_array(arch, stencil), -1) + return OffsetArray(on_architecture(arch, stencil), -1) end @inline dagger(ψ) = (ψ[2], ψ[3], ψ[1]) diff --git a/src/Advection/upwind_biased_reconstruction.jl b/src/Advection/upwind_biased_reconstruction.jl index 309709bcbc..e7b6955cc7 100644 --- a/src/Advection/upwind_biased_reconstruction.jl +++ b/src/Advection/upwind_biased_reconstruction.jl @@ -85,6 +85,13 @@ Adapt.adapt_structure(to, scheme::UpwindBiased{N, FT}) where {N, FT} = Adapt.adapt(to, scheme.buffer_scheme), Adapt.adapt(to, scheme.advecting_velocity_scheme)) +on_architecture(to, scheme::UpwindBiased{N, FT}) where {N, FT} = + UpwindBiased{N, FT}(on_architecture(to, scheme.coeff_xᶠᵃᵃ), on_architecture(to, scheme.coeff_xᶜᵃᵃ), + on_architecture(to, scheme.coeff_yᵃᶠᵃ), on_architecture(to, scheme.coeff_yᵃᶜᵃ), + on_architecture(to, scheme.coeff_zᵃᵃᶠ), on_architecture(to, scheme.coeff_zᵃᵃᶜ), + on_architecture(to, scheme.buffer_scheme), + on_architecture(to, scheme.advecting_velocity_scheme)) + # Useful aliases UpwindBiased(grid, FT::DataType=Float64; kwargs...) = UpwindBiased(FT; grid, kwargs...) diff --git a/src/Advection/vector_invariant_advection.jl b/src/Advection/vector_invariant_advection.jl index 7530e1788c..537dcf6eb3 100644 --- a/src/Advection/vector_invariant_advection.jl +++ b/src/Advection/vector_invariant_advection.jl @@ -229,6 +229,14 @@ Adapt.adapt_structure(to, scheme::VectorInvariant{N, FT, M}) where {N, FT, M} = Adapt.adapt(to, scheme.divergence_scheme), Adapt.adapt(to, scheme.upwinding)) +on_architecture(to, scheme::VectorInvariant{N, FT, M}) where {N, FT, M} = + VectorInvariant{N, FT, M}(on_architecture(to, scheme.vorticity_scheme), + on_architecture(to, scheme.vorticity_stencil), + on_architecture(to, scheme.vertical_scheme), + on_architecture(to, scheme.kinetic_energy_gradient_scheme), + on_architecture(to, scheme.divergence_scheme), + on_architecture(to, scheme.upwinding)) + @inline U_dot_∇u(i, j, k, grid, scheme::VectorInvariant, U) = horizontal_advection_U(i, j, k, grid, scheme, U.u, U.v) + vertical_advection_U(i, j, k, grid, scheme, U) + bernoulli_head_U(i, j, k, grid, scheme, U.u, U.v) diff --git a/src/Advection/vector_invariant_upwinding.jl b/src/Advection/vector_invariant_upwinding.jl index 02b1a4d4cb..b2b24470a0 100644 --- a/src/Advection/vector_invariant_upwinding.jl +++ b/src/Advection/vector_invariant_upwinding.jl @@ -140,7 +140,6 @@ Adapt.adapt_structure(to, scheme::CrossAndSelfUpwinding) = Adapt.adapt(to, scheme.δu²_stencil), Adapt.adapt(to, scheme.δv²_stencil)) - Base.show(io::IO, a::VelocityUpwinding) = print(io, summary(a), " \n", "KE gradient and Divergence flux cross terms reconstruction: ", "\n", diff --git a/src/Advection/weno_reconstruction.jl b/src/Advection/weno_reconstruction.jl index 2087794674..0ad255a68d 100644 --- a/src/Advection/weno_reconstruction.jl +++ b/src/Advection/weno_reconstruction.jl @@ -160,6 +160,14 @@ Adapt.adapt_structure(to, scheme::WENO{N, FT, XT, YT, ZT, WF, PP}) where {N, FT, Adapt.adapt(to, scheme.buffer_scheme), Adapt.adapt(to, scheme.advecting_velocity_scheme)) +on_architecture(to, scheme::WENO{N, FT, XT, YT, ZT, WF, PP}) where {N, FT, XT, YT, ZT, WF, PP} = + WENO{N, FT, WF}(on_architecture(to, scheme.coeff_xᶠᵃᵃ), on_architecture(to, scheme.coeff_xᶜᵃᵃ), + on_architecture(to, scheme.coeff_yᵃᶠᵃ), on_architecture(to, scheme.coeff_yᵃᶜᵃ), + on_architecture(to, scheme.coeff_zᵃᵃᶠ), on_architecture(to, scheme.coeff_zᵃᵃᶜ), + on_architecture(to, scheme.bounds), + on_architecture(to, scheme.buffer_scheme), + on_architecture(to, scheme.advecting_velocity_scheme)) + # Retrieve precomputed coefficients (+2 for julia's 1 based indices) @inline retrieve_coeff(scheme::WENO, r, ::Val{1}, i, ::Type{Face}) = @inbounds scheme.coeff_xᶠᵃᵃ[r+2][i] @inline retrieve_coeff(scheme::WENO, r, ::Val{1}, i, ::Type{Center}) = @inbounds scheme.coeff_xᶜᵃᵃ[r+2][i] diff --git a/src/Architectures.jl b/src/Architectures.jl index 78b55a067a..aebc19f4bb 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -2,7 +2,7 @@ module Architectures export AbstractArchitecture export CPU, GPU, MultiGPU -export device, architecture, array_type, arch_array, unified_array, device_copy_to! +export device, architecture, array_type, on_architecture, unified_array, device_copy_to! using CUDA using KernelAbstractions @@ -56,32 +56,32 @@ child_architecture(arch) = arch array_type(::CPU) = Array array_type(::GPU) = CuArray -arch_array(::CPU, a::Array) = a -arch_array(::CPU, a::CuArray) = Array(a) -arch_array(::GPU, a::Array) = CuArray(a) -arch_array(::GPU, a::CuArray) = a +on_architecture(::CPU, a::Array) = a +on_architecture(::CPU, a::CuArray) = Array(a) +on_architecture(::GPU, a::Array) = CuArray(a) +on_architecture(::GPU, a::CuArray) = a -arch_array(::CPU, a::BitArray) = a -arch_array(::GPU, a::BitArray) = CuArray(a) +on_architecture(::CPU, a::BitArray) = a +on_architecture(::GPU, a::BitArray) = CuArray(a) -arch_array(::GPU, a::SubArray{<:Any, <:Any, <:CuArray}) = a -arch_array(::CPU, a::SubArray{<:Any, <:Any, <:CuArray}) = Array(a) +on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:CuArray}) = a +on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:CuArray}) = Array(a) -arch_array(::GPU, a::SubArray{<:Any, <:Any, <:Array}) = CuArray(a) -arch_array(::CPU, a::SubArray{<:Any, <:Any, <:Array}) = a +on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:Array}) = CuArray(a) +on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:Array}) = a -arch_array(::CPU, a::AbstractRange) = a -arch_array(::CPU, ::Nothing) = nothing -arch_array(::CPU, a::Number) = a -arch_array(::CPU, a::Function) = a +on_architecture(::CPU, a::AbstractRange) = a +on_architecture(::CPU, ::Nothing) = nothing +on_architecture(::CPU, a::Number) = a +on_architecture(::CPU, a::Function) = a -arch_array(::GPU, a::AbstractRange) = a -arch_array(::GPU, ::Nothing) = nothing -arch_array(::GPU, a::Number) = a -arch_array(::GPU, a::Function) = a +on_architecture(::GPU, a::AbstractRange) = a +on_architecture(::GPU, ::Nothing) = nothing +on_architecture(::GPU, a::Number) = a +on_architecture(::GPU, a::Function) = a -arch_array(arch::CPU, a::OffsetArray) = OffsetArray(arch_array(arch, a.parent), a.offsets...) -arch_array(arch::GPU, a::OffsetArray) = OffsetArray(arch_array(arch, a.parent), a.offsets...) +on_architecture(arch::CPU, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) +on_architecture(arch::GPU, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) cpu_architecture(::CPU) = CPU() cpu_architecture(::GPU) = CPU() diff --git a/src/BoundaryConditions/boundary_condition.jl b/src/BoundaryConditions/boundary_condition.jl index 1e59c8f0be..86a81cbf25 100644 --- a/src/BoundaryConditions/boundary_condition.jl +++ b/src/BoundaryConditions/boundary_condition.jl @@ -1,4 +1,5 @@ import Adapt +import Oceananigans.Architectures: on_architecture """ struct BoundaryCondition{C<:AbstractBoundaryConditionClassification, T} @@ -69,6 +70,11 @@ end Adapt.adapt_structure(to, b::BoundaryCondition{Classification}) where Classification = BoundaryCondition(Classification(), Adapt.adapt(to, b.condition)) + +# Adapt boundary condition struct to be GPU friendly and passable to GPU kernels. +on_architecture(to, b::BoundaryCondition{Classification}) where Classification = + BoundaryCondition(Classification(), on_architecture(to, b.condition)) + ##### ##### Some abbreviations to make life easier. ##### diff --git a/src/BoundaryConditions/continuous_boundary_function.jl b/src/BoundaryConditions/continuous_boundary_function.jl index dcc4dbeea7..3023dbd2ac 100644 --- a/src/BoundaryConditions/continuous_boundary_function.jl +++ b/src/BoundaryConditions/continuous_boundary_function.jl @@ -217,3 +217,10 @@ Adapt.adapt_structure(to, bf::ContinuousBoundaryFunction{LX, LY, LZ, S}) where { nothing, Adapt.adapt(to, bf.field_dependencies_indices), Adapt.adapt(to, bf.field_dependencies_interp)) + +on_architecture(to, bf::ContinuousBoundaryFunction{LX, LY, LZ, S}) where {LX, LY, LZ, S} = + ContinuousBoundaryFunction{LX, LY, LZ, S}(on_architecture(to, bf.func), + on_architecture(to, bf.parameters), + on_architecture(to, bf.field_dependencies), + on_architecture(to, bf.field_dependencies_indices), + on_architecture(to, bf.field_dependencies_interp)) diff --git a/src/BoundaryConditions/discrete_boundary_function.jl b/src/BoundaryConditions/discrete_boundary_function.jl index 65ebf70323..39be642726 100644 --- a/src/BoundaryConditions/discrete_boundary_function.jl +++ b/src/BoundaryConditions/discrete_boundary_function.jl @@ -57,3 +57,6 @@ Base.summary(bf::DiscreteBoundaryFunction) = string("DiscreteBoundaryFunction ", Adapt.adapt_structure(to, bf::DiscreteBoundaryFunction) = DiscreteBoundaryFunction(Adapt.adapt(to, bf.func), Adapt.adapt(to, bf.parameters)) +on_architecture(to, bf::DiscreteBoundaryFunction) = DiscreteBoundaryFunction(on_architecture(to, bf.func), + on_architecture(to, bf.parameters)) + diff --git a/src/DistributedComputations/distributed_architectures.jl b/src/DistributedComputations/distributed_architectures.jl index 2aa2a53e45..7284ae9b66 100644 --- a/src/DistributedComputations/distributed_architectures.jl +++ b/src/DistributedComputations/distributed_architectures.jl @@ -2,7 +2,7 @@ using Oceananigans.Architectures using Oceananigans.Grids: topology, validate_tupled_argument using CUDA: ndevices, device! -import Oceananigans.Architectures: device, cpu_architecture, arch_array, array_type, child_architecture, convert_args +import Oceananigans.Architectures: device, cpu_architecture, on_architecture, array_type, child_architecture, convert_args import Oceananigans.Grids: zeros import Oceananigans.Utils: sync_device!, tupleit @@ -261,7 +261,7 @@ const SynchronizedDistributed = Distributed{<:Any, true} child_architecture(arch::Distributed) = arch.child_architecture device(arch::Distributed) = device(child_architecture(arch)) -arch_array(arch::Distributed, A) = arch_array(child_architecture(arch), A) +on_architecture(arch::Distributed, A) = on_architecture(child_architecture(arch), A) zeros(FT, arch::Distributed, N...) = zeros(FT, child_architecture(arch), N...) array_type(arch::Distributed) = array_type(child_architecture(arch)) sync_device!(arch::Distributed) = sync_device!(arch.child_architecture) diff --git a/src/DistributedComputations/distributed_fields.jl b/src/DistributedComputations/distributed_fields.jl index 878646662e..62b7fca628 100644 --- a/src/DistributedComputations/distributed_fields.jl +++ b/src/DistributedComputations/distributed_fields.jl @@ -45,7 +45,7 @@ function set!(u::DistributedField, v::Union{Array, CuArray}) return u else try - f = arch_array(architecture(u), v) + f = on_architecture(architecture(u), v) u .= f return u diff --git a/src/DistributedComputations/partition_assemble.jl b/src/DistributedComputations/partition_assemble.jl index c8b90e3cbb..d381743cb1 100644 --- a/src/DistributedComputations/partition_assemble.jl +++ b/src/DistributedComputations/partition_assemble.jl @@ -1,4 +1,4 @@ -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture all_reduce(op, val, arch::Distributed) = MPI.Allreduce(val, op, arch.communicator) @@ -116,7 +116,7 @@ partition_global_array(arch, c_global::Function, n) = c_global # Here we assume that we cannot partition in z (we should remove support for that) function partition_global_array(arch::Distributed, c_global::AbstractArray, n) - c_global = arch_array(CPU(), c_global) + c_global = on_architecture(CPU(), c_global) ri, rj, rk = arch.local_index @@ -137,7 +137,7 @@ function partition_global_array(arch::Distributed, c_global::AbstractArray, n) 1 + sum(ny[1:rj-1]) : sum(ny[1:rj]), 1:nz] end - return arch_array(child_architecture(arch), c_local) + return on_architecture(child_architecture(arch), c_local) end """ @@ -151,7 +151,7 @@ construct_global_array(arch, c_local::Function, N) = c_local # TODO: This does not work for 3D parallelizations!!! function construct_global_array(arch::Distributed, c_local::AbstractArray, n) - c_local = arch_array(CPU(), c_local) + c_local = on_architecture(CPU(), c_local) ri, rj, rk = arch.local_index @@ -180,5 +180,5 @@ function construct_global_array(arch::Distributed, c_local::AbstractArray, n) MPI.Allreduce!(c_global, +, arch.communicator) end - return arch_array(child_architecture(arch), c_global) + return on_architecture(child_architecture(arch), c_global) end diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index da749d6556..b974f00700 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -34,7 +34,7 @@ Build a field from `a` at `loc` and on `grid`. """ @inline function field(loc, a::AbstractArray, grid) f = Field(loc, grid) - a = arch_array(architecture(grid), a) + a = on_architecture(architecture(grid), a) try copyto!(parent(f), a) catch diff --git a/src/Fields/field.jl b/src/Fields/field.jl index 9337523cea..29f99af6dc 100644 --- a/src/Fields/field.jl +++ b/src/Fields/field.jl @@ -5,6 +5,7 @@ using Adapt using KernelAbstractions: @kernel, @index using Base: @propagate_inbounds +import Oceananigans.Architectures: on_architecture import Oceananigans.BoundaryConditions: fill_halo_regions!, getbc import Statistics: norm, mean, mean! import Base: == @@ -413,6 +414,19 @@ total_size(f::Field) = total_size(f.grid, location(f), f.indices) ==(a, f::Field) = a == interior(f) ==(a::Field, b::Field) = interior(a) == interior(b) +##### +##### Move Fields between architectures +##### + +on_architecture(arch, field::AbstractField) = + Field{LX, LY, LZ}(on_architecture(arch, field.grid), + on_architecture(arch, data), + on_architecture(arch, bcs), + on_architecture(arch, indices), + on_architecture(arch, op), + on_architecture(arch, status), + on_architecture(arch, buffers)) + ##### ##### Interface for field computations ##### diff --git a/src/Fields/field_boundary_buffers.jl b/src/Fields/field_boundary_buffers.jl index f11b2e9558..e514a3be3e 100644 --- a/src/Fields/field_boundary_buffers.jl +++ b/src/Fields/field_boundary_buffers.jl @@ -1,9 +1,11 @@ using Oceananigans.BoundaryConditions: MCBC, DCBC -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.Grids: halo_size, size using Oceananigans.Utils: launch! using KernelAbstractions: @kernel, @index +import Oceananigans.Architectures: on_architecture + struct FieldBoundaryBuffers{W, E, S, N, SW, SE, NW, NE} west :: W east :: E @@ -61,31 +63,31 @@ create_buffer_corner(arch, grid, data, Hx, Hy, ::Nothing, edge2) = nothing create_buffer_corner(arch, grid, data, Hx, Hy, ::Nothing, ::Nothing) = nothing function create_buffer_corner(arch, grid, data, Hx, Hy, edge1, edge2) - return (send = arch_array(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3))), - recv = arch_array(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3)))) + return (send = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3))), + recv = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3)))) end function create_buffer_x(arch, grid, data, H, ::DCBC) # Either we pass corners or it is a 1D parallelization in x size_y = arch.ranks[2] == 1 ? size(parent(data), 2) : size(grid, 2) - return (send = arch_array(arch, zeros(eltype(data), H, size_y, size(parent(data), 3))), - recv = arch_array(arch, zeros(eltype(data), H, size_y, size(parent(data), 3)))) + return (send = on_architecture(arch, zeros(eltype(data), H, size_y, size(parent(data), 3))), + recv = on_architecture(arch, zeros(eltype(data), H, size_y, size(parent(data), 3)))) end function create_buffer_y(arch, grid, data, H, ::DCBC) # Either we pass corners or it is a 1D parallelization in y size_x = arch.ranks[1] == 1 ? size(parent(data), 1) : size(grid, 1) - return (send = arch_array(arch, zeros(eltype(data), size_x, H, size(parent(data), 3))), - recv = arch_array(arch, zeros(eltype(data), size_x, H, size(parent(data), 3)))) + return (send = on_architecture(arch, zeros(eltype(data), size_x, H, size(parent(data), 3))), + recv = on_architecture(arch, zeros(eltype(data), size_x, H, size(parent(data), 3)))) end create_buffer_x(arch, grid, data, H, ::MCBC) = - (send = arch_array(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))), - recv = arch_array(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3)))) + (send = on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))), + recv = on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3)))) create_buffer_y(arch, grid, data, H, ::MCBC) = - (send = arch_array(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3))), - recv = arch_array(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3)))) + (send = on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3))), + recv = on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3)))) Adapt.adapt_structure(to, buff::FieldBoundaryBuffers) = FieldBoundaryBuffers(Adapt.adapt(to, buff.west), @@ -97,6 +99,16 @@ Adapt.adapt_structure(to, buff::FieldBoundaryBuffers) = Adapt.adapt(to, buff.northwest), Adapt.adapt(to, buff.northeast)) +on_architecture(arch, buff::FieldBoundaryBuffers) = + FieldBoundaryBuffers(on_architecture(arch, buff.west), + on_architecture(arch, buff.east), + on_architecture(arch, buff.north), + on_architecture(arch, buff.south), + on_architecture(arch, buff.southwest), + on_architecture(arch, buff.southeast), + on_architecture(arch, buff.northwest), + on_architecture(arch, buff.northeast)) + """ fill_send_buffers!(c::OffsetArray, buffers::FieldBoundaryBuffers, grid) diff --git a/src/Fields/function_field.jl b/src/Fields/function_field.jl index 39a3a78eab..0a35dfd0af 100644 --- a/src/Fields/function_field.jl +++ b/src/Fields/function_field.jl @@ -72,6 +72,13 @@ Adapt.adapt_structure(to, f::FunctionField{LX, LY, LZ}) where {LX, LY, LZ} = clock = Adapt.adapt(to, f.clock), parameters = Adapt.adapt(to, f.parameters)) + +on_architecture(to, f::FunctionField{LX, LY, LZ}) where {LX, LY, LZ} = + FunctionField{LX, LY, LZ}(on_architecture(to, f.func), + on_architecture(to, f.grid), + clock = on_architecture(to, f.clock), + parameters = on_architecture(to, f.parameters)) + Base.show(io::IO, field::FunctionField) = print(io, "FunctionField located at ", show_location(field), "\n", "├── func: $(prettysummary(field.func))", "\n", diff --git a/src/Fields/regridding_fields.jl b/src/Fields/regridding_fields.jl index a23c6e6a48..758257f369 100644 --- a/src/Fields/regridding_fields.jl +++ b/src/Fields/regridding_fields.jl @@ -1,6 +1,6 @@ using KernelAbstractions: @kernel, @index -using Oceananigans.Architectures: arch_array, architecture +using Oceananigans.Architectures: on_architecture, architecture using Oceananigans.Operators: Δzᶜᶜᶜ, Δyᶜᶜᶜ, Δxᶜᶜᶜ, Azᶜᶜᶜ using Oceananigans.Grids: hack_sind, ξnode, ηnode, rnode diff --git a/src/Fields/set!.jl b/src/Fields/set!.jl index f6e8a8eb78..f2e89a7817 100644 --- a/src/Fields/set!.jl +++ b/src/Fields/set!.jl @@ -70,7 +70,7 @@ function set!(u::Field, f::Function) end function set!(u::Field, f::Union{Array, CuArray, OffsetArray}) - f = arch_array(architecture(u), f) + f = on_architecture(architecture(u), f) u .= f return u end @@ -91,7 +91,7 @@ function set!(u::Field, v::Field) interior(u) .= interior(v) end else - v_data = arch_array(architecture(u), v.data) + v_data = on_architecture(architecture(u), v.data) # As above, we permit ourselves a little ambition and try to copy halo data: try diff --git a/src/Forcings/advective_forcing.jl b/src/Forcings/advective_forcing.jl index d1ba1a0394..ecf5191c47 100644 --- a/src/Forcings/advective_forcing.jl +++ b/src/Forcings/advective_forcing.jl @@ -3,6 +3,8 @@ using Oceananigans.Fields: ZeroField, ConstantField using Oceananigans.Utils: SumOfArrays using Adapt +import Oceananigans.Architectures: on_architecture + maybe_constant_field(u) = u maybe_constant_field(u::Number) = ConstantField(u) @@ -69,6 +71,10 @@ end Adapt.adapt_structure(to, af::AdvectiveForcing) = AdvectiveForcing(adapt(to, af.u), adapt(to, af.v), adapt(to, af.w)) +on_architecture(to, af::AdvectiveForcing) = + AdvectiveForcing(on_architecture(to, af.u), on_architecture(to, af.v), on_architecture(to, af.w)) + + # fallback @inline with_advective_forcing(forcing, total_velocities) = total_velocities diff --git a/src/Forcings/continuous_forcing.jl b/src/Forcings/continuous_forcing.jl index f9ee124754..6f3283358e 100644 --- a/src/Forcings/continuous_forcing.jl +++ b/src/Forcings/continuous_forcing.jl @@ -5,6 +5,8 @@ using Oceananigans.Operators: assumed_field_location, index_and_interp_dependenc using Oceananigans.Fields: show_location using Oceananigans.Utils: user_function_arguments, tupleit, prettysummary +import Oceananigans.Architectures: on_architecture + """ ContinuousForcing{LX, LY, LZ, P, F, D, I, ℑ} @@ -162,3 +164,10 @@ Adapt.adapt_structure(to, forcing::ContinuousForcing{LX, LY, LZ}) where {LX, LY, Adapt.adapt(to, forcing.field_dependencies_indices), Adapt.adapt(to, forcing.field_dependencies_interp)) +on_architecture(to, forcing::ContinuousForcing{LX, LY, LZ}) where {LX, LY, LZ} = + ContinuousForcing{LX, LY, LZ}(on_architecture(to, forcing.func), + on_architecture(to, forcing.parameters), + on_architecture(to, forcing.field_dependencies), + on_architecture(to, forcing.field_dependencies_indices), + on_architecture(to, forcing.field_dependencies_interp)) + diff --git a/src/Forcings/discrete_forcing.jl b/src/Forcings/discrete_forcing.jl index 5105c8f10b..435fc05cd1 100644 --- a/src/Forcings/discrete_forcing.jl +++ b/src/Forcings/discrete_forcing.jl @@ -60,3 +60,7 @@ Base.show(io::IO, forcing::DiscreteForcing{P}) where P = Adapt.adapt_structure(to, forcing::DiscreteForcing) = DiscreteForcing(Adapt.adapt(to, forcing.func), Adapt.adapt(to, forcing.parameters)) + +on_architecture(to, forcing::DiscreteForcing) = + DiscreteForcing(on_architecture(to, forcing.func), + on_architecture(to, forcing.parameters)) diff --git a/src/Forcings/multiple_forcings.jl b/src/Forcings/multiple_forcings.jl index 060e56a911..30123ec36d 100644 --- a/src/Forcings/multiple_forcings.jl +++ b/src/Forcings/multiple_forcings.jl @@ -5,6 +5,7 @@ struct MultipleForcings{N, F} end Adapt.adapt_structure(to, mf::MultipleForcings) = MultipleForcings(adapt(to, mf.forcings)) +on_architecture(to, mf::MultipleForcings) = MultipleForcings(on_architecture(to, mf.forcings)) Base.getindex(mf::MultipleForcings, i) = mf.forcings[i] diff --git a/src/Grids/grid_generation.jl b/src/Grids/grid_generation.jl index 98bd421d43..056c4fb7af 100644 --- a/src/Grids/grid_generation.jl +++ b/src/Grids/grid_generation.jl @@ -1,8 +1,4 @@ # Utilities to generate a grid with the following inputs - -@inline adapt_if_vector(to, var) = var -@inline adapt_if_vector(to, var::AbstractArray) = Adapt.adapt(to, var) - get_domain_extent(::Nothing, N) = (1, 1) get_domain_extent(coord, N) = (coord[1], coord[2]) get_domain_extent(coord::Function, N) = (coord(1), coord(N+1)) @@ -76,15 +72,15 @@ function generate_coordinate(FT, topo::AT, N, H, node_generator, coordinate_name Δᶠ[i] = Δᶠ[i-1] end - Δᶜ = OffsetArray(arch_array(arch, Δᶜ), -H) - Δᶠ = OffsetArray(arch_array(arch, Δᶠ), -H-1) + Δᶜ = OffsetArray(on_architecture(arch, Δᶜ), -H) + Δᶠ = OffsetArray(on_architecture(arch, Δᶠ), -H-1) F = OffsetArray(F, -H) C = OffsetArray(C, -H) # Convert to appropriate array type for arch - F = OffsetArray(arch_array(arch, F.parent), F.offsets...) - C = OffsetArray(arch_array(arch, C.parent), C.offsets...) + F = OffsetArray(on_architecture(arch, F.parent), F.offsets...) + C = OffsetArray(on_architecture(arch, C.parent), C.offsets...) return L, F, C, Δᶠ, Δᶜ end diff --git a/src/Grids/grid_utils.jl b/src/Grids/grid_utils.jl index fbcef761f7..90b34d603f 100644 --- a/src/Grids/grid_utils.jl +++ b/src/Grids/grid_utils.jl @@ -459,7 +459,7 @@ function add_halos(data, loc, topo, sz, halo_sz; warnings=true) arch = architecture(data) # bring to CPU - map(a -> arch_array(CPU(), a), data) + map(a -> on_architecture(CPU(), a), data) nx, ny, nz = total_length(loc[1](), topo[1](), sz[1], 0), total_length(loc[2](), topo[2](), sz[2], 0), @@ -484,7 +484,7 @@ function add_halos(data, loc, topo, sz, halo_sz; warnings=true) offset_array[1:nx, 1:ny, 1:nz] = data[1:nx, 1:ny, 1:nz] # return to data's original architecture - map(a -> arch_array(arch, a), offset_array) + map(a -> on_architecture(arch, a), offset_array) return offset_array end diff --git a/src/Grids/latitude_longitude_grid.jl b/src/Grids/latitude_longitude_grid.jl index f6e2d7a887..3e5fafd730 100644 --- a/src/Grids/latitude_longitude_grid.jl +++ b/src/Grids/latitude_longitude_grid.jl @@ -354,7 +354,7 @@ function on_architecture(new_arch::AbstractArchitecture, old_grid::LatitudeLongi old_grid.Δyᶠᶜᵃ, old_grid.Δyᶜᶠᵃ, old_grid.Azᶠᶜᵃ, old_grid.Azᶜᶠᵃ, old_grid.Azᶠᶠᵃ, old_grid.Azᶜᶜᵃ) - new_properties = Tuple(arch_array(new_arch, p) for p in old_properties) + new_properties = Tuple(on_architecture(new_arch, p) for p in old_properties) TX, TY, TZ = topology(old_grid) @@ -552,7 +552,7 @@ function allocate_metrics(grid::LatitudeLongitudeGrid) for metric in grid_metrics parentM = Symbol(metric, :_parent) @eval $parentM = zeros($FT, $metric_size...) - @eval $metric = OffsetArray(arch_array($arch, $parentM), $offsets...) + @eval $metric = OffsetArray(on_architecture($arch, $parentM), $offsets...) end if grid isa YRegularLLG @@ -561,8 +561,8 @@ function allocate_metrics(grid::LatitudeLongitudeGrid) else parentC = zeros(FT, length(grid.Δφᵃᶜᵃ)) parentF = zeros(FT, length(grid.Δφᵃᶜᵃ)) - Δyᶠᶜ = OffsetArray(arch_array(arch, parentC), grid.Δφᵃᶜᵃ.offsets[1]) - Δyᶜᶠ = OffsetArray(arch_array(arch, parentF), grid.Δφᵃᶜᵃ.offsets[1]) + Δyᶠᶜ = OffsetArray(on_architecture(arch, parentC), grid.Δφᵃᶜᵃ.offsets[1]) + Δyᶜᶠ = OffsetArray(on_architecture(arch, parentF), grid.Δφᵃᶜᵃ.offsets[1]) end return Δxᶠᶜ, Δxᶜᶠ, Δxᶠᶠ, Δxᶜᶜ, Δyᶠᶜ, Δyᶜᶠ, Azᶠᶜ, Azᶜᶠ, Azᶠᶠ, Azᶜᶜ diff --git a/src/Grids/orthogonal_spherical_shell_grid.jl b/src/Grids/orthogonal_spherical_shell_grid.jl index 5d9eb736ae..b8b3f0a723 100644 --- a/src/Grids/orthogonal_spherical_shell_grid.jl +++ b/src/Grids/orthogonal_spherical_shell_grid.jl @@ -589,13 +589,13 @@ function conformal_cubed_sphere_panel(architecture::AbstractArchitecture = CPU() # convert to coordinate_arrays = (λᶜᶜᵃ, λᶠᶜᵃ, λᶜᶠᵃ, λᶠᶠᵃ, φᶜᶜᵃ, φᶠᶜᵃ, φᶜᶠᵃ, φᶠᶠᵃ, zᵃᵃᶜ, zᵃᵃᶠ) - coordinate_arrays = map(a -> arch_array(architecture, a), coordinate_arrays) + coordinate_arrays = map(a -> on_architecture(architecture, a), coordinate_arrays) metric_arrays = (Δxᶜᶜᵃ, Δxᶠᶜᵃ, Δxᶜᶠᵃ, Δxᶠᶠᵃ, Δyᶜᶜᵃ, Δyᶜᶠᵃ, Δyᶠᶜᵃ, Δyᶠᶠᵃ, Δzᵃᵃᶜ, Δzᵃᵃᶠ, Azᶜᶜᵃ, Azᶠᶜᵃ, Azᶜᶠᵃ, Azᶠᶠᵃ) - metric_arrays = map(a -> arch_array(architecture, a), metric_arrays) + metric_arrays = map(a -> on_architecture(architecture, a), metric_arrays) conformal_mapping = (; ξ, η, rotation) @@ -777,7 +777,7 @@ function load_and_offset_cubed_sphere_data(file, FT, arch, field_name, loc, topo ii = interior_indices(loc[1](), topo[1](), N[1]) jj = interior_indices(loc[2](), topo[2](), N[2]) - interior_data = arch_array(arch, file[field_name][ii, jj]) + interior_data = on_architecture(arch, file[field_name][ii, jj]) underlying_data = zeros(FT, arch, total_length(loc[1](), topo[1](), N[1], H[1]), @@ -902,9 +902,9 @@ function on_architecture(arch::AbstractArchitecture, grid::OrthogonalSphericalSh :Azᶜᶠᵃ, :Azᶠᶠᵃ) - grid_spacing_data = Tuple(arch_array(arch, getproperty(grid, name)) for name in grid_spacings) - coordinate_data = Tuple(arch_array(arch, getproperty(grid, name)) for name in coordinates) - horizontal_area_data = Tuple(arch_array(arch, getproperty(grid, name)) for name in horizontal_areas) + grid_spacing_data = Tuple(on_architecture(arch, getproperty(grid, name)) for name in grid_spacings) + coordinate_data = Tuple(on_architecture(arch, getproperty(grid, name)) for name in coordinates) + horizontal_area_data = Tuple(on_architecture(arch, getproperty(grid, name)) for name in horizontal_areas) TX, TY, TZ = topology(grid) diff --git a/src/Grids/rectilinear_grid.jl b/src/Grids/rectilinear_grid.jl index 17eca771b9..99f35afe09 100644 --- a/src/Grids/rectilinear_grid.jl +++ b/src/Grids/rectilinear_grid.jl @@ -399,7 +399,7 @@ function on_architecture(new_arch::AbstractArchitecture, old_grid::RectilinearGr old_grid.Δyᵃᶠᵃ, old_grid.Δyᵃᶜᵃ, old_grid.yᵃᶠᵃ, old_grid.yᵃᶜᵃ, old_grid.Δzᵃᵃᶠ, old_grid.Δzᵃᵃᶜ, old_grid.zᵃᵃᶠ, old_grid.zᵃᵃᶜ) - new_properties = Tuple(arch_array(new_arch, p) for p in old_properties) + new_properties = Tuple(on_architecture(new_arch, p) for p in old_properties) TX, TY, TZ = topology(old_grid) diff --git a/src/ImmersedBoundaries/active_cells_map.jl b/src/ImmersedBoundaries/active_cells_map.jl index dbb4869368..5b4b516dde 100644 --- a/src/ImmersedBoundaries/active_cells_map.jl +++ b/src/ImmersedBoundaries/active_cells_map.jl @@ -162,7 +162,7 @@ function interior_active_indices(ibg; parameters = :xyz) # Cannot findall on the entire field because we incur on OOM errors active_indices = IndicesType[] active_indices = findall_active_indices!(active_indices, active_cells_field, ibg, IndicesType) - active_indices = arch_array(architecture(ibg), active_indices) + active_indices = on_architecture(architecture(ibg), active_indices) return active_indices end @@ -173,7 +173,7 @@ end function findall_active_indices!(active_indices, active_cells_field, ibg, IndicesType) for k in 1:size(ibg, 3) - interior_indices = findall(arch_array(CPU(), interior(active_cells_field, :, :, k:k))) + interior_indices = findall(on_architecture(CPU(), interior(active_cells_field, :, :, k:k))) interior_indices = convert_interior_indices(interior_indices, k, IndicesType) active_indices = vcat(active_indices, interior_indices) GC.gc() @@ -234,7 +234,7 @@ end # computation only on active `columns` function map_active_z_columns(ibg) active_cells_field = compute_active_z_columns(ibg) - interior_cells = arch_array(CPU(), interior(active_cells_field, :, :, 1)) + interior_cells = on_architecture(CPU(), interior(active_cells_field, :, :, 1)) full_indices = findall(interior_cells) @@ -243,7 +243,7 @@ function map_active_z_columns(ibg) N = max(Nx, Ny) IntType = N > MAXUInt8 ? (N > MAXUInt16 ? (N > MAXUInt32 ? UInt64 : UInt32) : UInt16) : UInt8 surface_map = getproperty.(full_indices, Ref(:I)) .|> Tuple{IntType, IntType} - surface_map = arch_array(architecture(ibg), surface_map) + surface_map = on_architecture(architecture(ibg), surface_map) return surface_map end diff --git a/src/ImmersedBoundaries/distributed_immersed_boundaries.jl b/src/ImmersedBoundaries/distributed_immersed_boundaries.jl index f524d8f7e4..726edda8be 100644 --- a/src/ImmersedBoundaries/distributed_immersed_boundaries.jl +++ b/src/ImmersedBoundaries/distributed_immersed_boundaries.jl @@ -75,7 +75,7 @@ function resize_immersed_boundary(ib::AbstractGridFittedBottom{<:OffsetArray}, g if any(size(ib.bottom_height) .!= bottom_heigth_size) @warn "Resizing the bottom field to match the grids' halos" bottom_field = Field((Center, Center, Nothing), grid) - cpu_bottom = arch_array(CPU(), ib.bottom_height)[1:Nx, 1:Ny] + cpu_bottom = on_architecture(CPU(), ib.bottom_height)[1:Nx, 1:Ny] set!(bottom_field, cpu_bottom) fill_halo_regions!(bottom_field) offset_bottom_array = dropdims(bottom_field.data, dims=3) diff --git a/src/ImmersedBoundaries/grid_fitted_bottom.jl b/src/ImmersedBoundaries/grid_fitted_bottom.jl index e693e20e2d..a3461ac16d 100644 --- a/src/ImmersedBoundaries/grid_fitted_bottom.jl +++ b/src/ImmersedBoundaries/grid_fitted_bottom.jl @@ -4,7 +4,7 @@ using OffsetArrays: OffsetArray using Oceananigans.Utils: getnamewrapper using Oceananigans.Grids: total_size using Oceananigans.Fields: fill_halo_regions! -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.BoundaryConditions: FBC using Printf diff --git a/src/ImmersedBoundaries/immersed_reductions.jl b/src/ImmersedBoundaries/immersed_reductions.jl index e234f849ae..a9969363c6 100644 --- a/src/ImmersedBoundaries/immersed_reductions.jl +++ b/src/ImmersedBoundaries/immersed_reductions.jl @@ -23,7 +23,7 @@ const IF = AbstractField{<:Any, <:Any, <:Any, <:ImmersedBoundaryGrid} @inline function condition_operand(func::Function, op::IF, cond::AbstractArray, mask) arch = architecture(op.grid) - arch_condition = arch_array(arch, cond) + arch_condition = on_architecture(arch, cond) ni_condition = NotImmersed(arch_condition) return ConditionalOperation(op; func, condition=ni_condition, mask) end diff --git a/src/ImmersedBoundaries/partial_cell_bottom.jl b/src/ImmersedBoundaries/partial_cell_bottom.jl index 608c413679..94fff4cbca 100644 --- a/src/ImmersedBoundaries/partial_cell_bottom.jl +++ b/src/ImmersedBoundaries/partial_cell_bottom.jl @@ -1,6 +1,6 @@ using Oceananigans.Utils: prettysummary using Oceananigans.Fields: fill_halo_regions! -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Printf ##### diff --git a/src/Models/HydrostaticFreeSurfaceModels/matrix_implicit_free_surface_solver.jl b/src/Models/HydrostaticFreeSurfaceModels/matrix_implicit_free_surface_solver.jl index 41d0386160..65f4030132 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/matrix_implicit_free_surface_solver.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/matrix_implicit_free_surface_solver.jl @@ -48,7 +48,7 @@ function MatrixImplicitFreeSurfaceSolver(grid::AbstractGrid, settings, gravitati compute_vertically_integrated_lateral_areas!(vertically_integrated_lateral_areas) arch = architecture(grid) - right_hand_side = arch_array(arch, zeros(grid.Nx * grid.Ny)) # linearized RHS for matrix operations + right_hand_side = on_architecture(arch, zeros(grid.Nx * grid.Ny)) # linearized RHS for matrix operations storage = deepcopy(right_hand_side) @@ -112,11 +112,11 @@ function compute_matrix_coefficients(vertically_integrated_areas, grid, gravitat Nx, Ny = grid.Nx, grid.Ny - C = arch_array(arch, zeros(eltype(grid), Nx, Ny, 1)) - diag = arch_array(arch, zeros(eltype(grid), Nx, Ny, 1)) - Ax = arch_array(arch, zeros(eltype(grid), Nx, Ny, 1)) - Ay = arch_array(arch, zeros(eltype(grid), Nx, Ny, 1)) - Az = arch_array(arch, zeros(eltype(grid), Nx, Ny, 1)) + C = on_architecture(arch, zeros(eltype(grid), Nx, Ny, 1)) + diag = on_architecture(arch, zeros(eltype(grid), Nx, Ny, 1)) + Ax = on_architecture(arch, zeros(eltype(grid), Nx, Ny, 1)) + Ay = on_architecture(arch, zeros(eltype(grid), Nx, Ny, 1)) + Az = on_architecture(arch, zeros(eltype(grid), Nx, Ny, 1)) ∫Ax = vertically_integrated_areas.xᶠᶜᶜ ∫Ay = vertically_integrated_areas.yᶜᶠᶜ diff --git a/src/MultiRegion/multi_region_boundary_conditions.jl b/src/MultiRegion/multi_region_boundary_conditions.jl index 0c3e9456ba..1910c8847e 100644 --- a/src/MultiRegion/multi_region_boundary_conditions.jl +++ b/src/MultiRegion/multi_region_boundary_conditions.jl @@ -1,5 +1,5 @@ using Oceananigans: instantiated_location -using Oceananigans.Architectures: arch_array, device_copy_to! +using Oceananigans.Architectures: on_architecture, device_copy_to! using Oceananigans.Operators: assumed_field_location using Oceananigans.Fields: reduced_dimensions using Oceananigans.DistributedComputations: communication_side diff --git a/src/MultiRegion/multi_region_grid.jl b/src/MultiRegion/multi_region_grid.jl index d316f55729..707de5bd3c 100644 --- a/src/MultiRegion/multi_region_grid.jl +++ b/src/MultiRegion/multi_region_grid.jl @@ -197,13 +197,13 @@ Adapt an array `a` to be compatible with a `MultiRegionGrid`. function multi_region_object_from_array(a::AbstractArray, mrg::MultiRegionGrid) local_size = construct_regionally(size, mrg) arch = architecture(mrg) - a = arch_array(CPU(), a) + a = on_architecture(CPU(), a) ma = construct_regionally(partition_global_array, a, mrg.partition, local_size, Iterate(1:length(mrg)), arch) return ma end # Fallback! -multi_region_object_from_array(a::AbstractArray, grid) = arch_array(architecture(grid), a) +multi_region_object_from_array(a::AbstractArray, grid) = on_architecture(architecture(grid), a) #### #### Utilities for MultiRegionGrid diff --git a/src/MultiRegion/x_partitions.jl b/src/MultiRegion/x_partitions.jl index e5b056c539..92fb3cf930 100644 --- a/src/MultiRegion/x_partitions.jl +++ b/src/MultiRegion/x_partitions.jl @@ -59,13 +59,13 @@ partition_global_array(a::Field, p::EqualXPartition, args...) = partition_global function partition_global_array(a::AbstractArray, ::EqualXPartition, local_size, region, arch) idxs = default_indices(length(size(a))) - return arch_array(arch, a[local_size[1]*(region-1)+1:local_size[1]*region, idxs[2:end]...]) + return on_architecture(arch, a[local_size[1]*(region-1)+1:local_size[1]*region, idxs[2:end]...]) end function partition_global_array(a::OffsetArray, ::EqualXPartition, local_size, region, arch) idxs = default_indices(length(size(a))) offsets = (a.offsets[1], Tuple(0 for i in 1:length(idxs)-1)...) - return arch_array(arch, OffsetArray(a[local_size[1]*(region-1)+1+offsets[1]:local_size[1]*region-offsets[1], idxs[2:end]...], offsets...)) + return on_architecture(arch, OffsetArray(a[local_size[1]*(region-1)+1+offsets[1]:local_size[1]*region-offsets[1], idxs[2:end]...], offsets...)) end ##### @@ -114,10 +114,10 @@ function reconstruct_global_array(ma::ArrayMRO{T, N}, p::EqualXPartition, arch) for r = 1:length(p) init = Int(n * (r - 1) + 1) fin = Int(n * r) - arr_out[init:fin, idxs[2:end]...] .= arch_array(CPU(), ma[r])[1:fin-init+1, idxs[2:end]...] + arr_out[init:fin, idxs[2:end]...] .= on_architecture(CPU(), ma[r])[1:fin-init+1, idxs[2:end]...] end - return arch_array(arch, arr_out) + return on_architecture(arch, arr_out) end function compact_data!(global_field, global_grid, data::MultiRegionObject, p::EqualXPartition) diff --git a/src/MultiRegion/y_partitions.jl b/src/MultiRegion/y_partitions.jl index d854de3e73..06374a5a58 100644 --- a/src/MultiRegion/y_partitions.jl +++ b/src/MultiRegion/y_partitions.jl @@ -55,13 +55,13 @@ partition_global_array(a::Field, p::EqualYPartition, args...) = partition_global function partition_global_array(a::AbstractArray, ::EqualYPartition, local_size, region, arch) idxs = default_indices(length(size(a))) offsets = (a.offsets[1], Tuple(0 for i in 1:length(idxs)-1)...) - return arch_array(arch, OffsetArray(a[local_size[1]*(region-1)+1+offsets[1]:local_size[1]*region-offsets[1], idxs[2:end]...], offsets...)) + return on_architecture(arch, OffsetArray(a[local_size[1]*(region-1)+1+offsets[1]:local_size[1]*region-offsets[1], idxs[2:end]...], offsets...)) end function partition_global_array(a::OffsetArray, ::EqualYPartition, local_size, region, arch) idxs = default_indices(length(size(a))) offsets = (0, a.offsets[2], Tuple(0 for i in 1:length(idxs)-2)...) - return arch_array(arch, OffsetArray(a[idxs[1], local_size[2]*(region-1)+1+offsets[2]:local_size[2]*region-offsets[2], idxs[3:end]...], offsets...)) + return on_architecture(arch, OffsetArray(a[idxs[1], local_size[2]*(region-1)+1+offsets[2]:local_size[2]*region-offsets[2], idxs[3:end]...], offsets...)) end #### @@ -104,10 +104,10 @@ function reconstruct_global_array(ma::ArrayMRO{T, N}, p::EqualYPartition, arch) for r = 1:length(p) init = Int(n * (r - 1) + 1) fin = Int(n * r) - arr_out[idxs[1], init:fin, idxs[3:end]...] .= arch_array(CPU(), ma[r])[idxs[1], 1:fin-init+1, idxs[3:end]...] + arr_out[idxs[1], init:fin, idxs[3:end]...] .= on_architecture(CPU(), ma[r])[idxs[1], 1:fin-init+1, idxs[3:end]...] end - return arch_array(arch, arr_out) + return on_architecture(arch, arr_out) end function compact_data!(global_field, global_grid, data::MultiRegionObject, p::EqualYPartition) diff --git a/src/OutputReaders/field_time_series.jl b/src/OutputReaders/field_time_series.jl index 82aa537406..46dab7332d 100644 --- a/src/OutputReaders/field_time_series.jl +++ b/src/OutputReaders/field_time_series.jl @@ -247,7 +247,7 @@ mutable struct FieldTimeSeries{LX, LY, LZ, TI, K, I, D, G, ET, B, χ, P, N} <: A times = time_range end - times = arch_array(architecture(grid), times) + times = on_architecture(architecture(grid), times) end if time_indexing isa Cyclical{Nothing} # we have to infer the period @@ -551,7 +551,7 @@ function Field(location, path::String, name::String, iter; # Change grid to specified architecture? grid = on_architecture(architecture, grid) - raw_data = arch_array(architecture, raw_data) + raw_data = on_architecture(architecture, raw_data) data = offset_data(raw_data, grid, location, indices) return Field(location, grid; boundary_conditions, indices, data) diff --git a/src/OutputReaders/field_time_series_indexing.jl b/src/OutputReaders/field_time_series_indexing.jl index c50037f49b..3dcc20a29f 100644 --- a/src/OutputReaders/field_time_series_indexing.jl +++ b/src/OutputReaders/field_time_series_indexing.jl @@ -78,7 +78,7 @@ function getindex(fts::OnDiskFTS, n::Int) arch = architecture(fts) file = jldopen(fts.path) iter = keys(file["timeseries/t"])[n] - raw_data = arch_array(arch, file["timeseries/$(fts.name)/$iter"]) + raw_data = on_architecture(arch, file["timeseries/$(fts.name)/$iter"]) close(file) # Wrap Field diff --git a/src/Solvers/Solvers.jl b/src/Solvers/Solvers.jl index 9dac3b9e93..c5a5bf13ce 100644 --- a/src/Solvers/Solvers.jl +++ b/src/Solvers/Solvers.jl @@ -13,7 +13,7 @@ using CUDA using SparseArrays using KernelAbstractions -using Oceananigans.Architectures: device, CPU, GPU, array_type, arch_array +using Oceananigans.Architectures: device, CPU, GPU, array_type, on_architecture using Oceananigans.Utils using Oceananigans.Grids using Oceananigans.BoundaryConditions diff --git a/src/Solvers/batched_tridiagonal_solver.jl b/src/Solvers/batched_tridiagonal_solver.jl index ec5872a846..6316fc15d2 100644 --- a/src/Solvers/batched_tridiagonal_solver.jl +++ b/src/Solvers/batched_tridiagonal_solver.jl @@ -1,4 +1,4 @@ -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.Grids: XDirection, YDirection, ZDirection import Oceananigans.Architectures: architecture @@ -26,7 +26,7 @@ architecture(solver::BatchedTridiagonalSolver) = architecture(solver.grid) lower_diagonal, diagonal, upper_diagonal, - scratch = arch_array(architecture(grid), zeros(eltype(grid), size(grid)...)), + scratch = on_architecture(architecture(grid), zeros(eltype(grid), size(grid)...)), tridiagonal_direction = ZDirection() parameters = nothing) @@ -66,7 +66,7 @@ function BatchedTridiagonalSolver(grid; lower_diagonal, diagonal, upper_diagonal, - scratch = arch_array(architecture(grid), zeros(eltype(grid), grid.Nx, grid.Ny, grid.Nz)), + scratch = on_architecture(architecture(grid), zeros(eltype(grid), grid.Nx, grid.Ny, grid.Nz)), parameters = nothing, tridiagonal_direction = ZDirection()) diff --git a/src/Solvers/discrete_transforms.jl b/src/Solvers/discrete_transforms.jl index 744ea60d3d..9cf20a3c88 100644 --- a/src/Solvers/discrete_transforms.jl +++ b/src/Solvers/discrete_transforms.jl @@ -66,8 +66,8 @@ function twiddle_factors(arch::GPU, grid, dims) ω_4N⁻[1] *= 1/2 twiddle_factors = ( - forward = arch_array(arch, ω_4N⁺), - backward = arch_array(arch, ω_4N⁻) + forward = on_architecture(arch, ω_4N⁺), + backward = on_architecture(arch, ω_4N⁻) ) return twiddle_factors diff --git a/src/Solvers/fft_based_poisson_solver.jl b/src/Solvers/fft_based_poisson_solver.jl index 15f54704f5..22c1eddce0 100644 --- a/src/Solvers/fft_based_poisson_solver.jl +++ b/src/Solvers/fft_based_poisson_solver.jl @@ -56,11 +56,11 @@ function FFTBasedPoissonSolver(grid, planner_flag=FFTW.PATIENT) arch = architecture(grid) - eigenvalues = (λx = arch_array(arch, λx), - λy = arch_array(arch, λy), - λz = arch_array(arch, λz)) + eigenvalues = (λx = on_architecture(arch, λx), + λy = on_architecture(arch, λy), + λz = on_architecture(arch, λz)) - storage = arch_array(arch, zeros(complex(eltype(grid)), size(grid)...)) + storage = on_architecture(arch, zeros(complex(eltype(grid)), size(grid)...)) transforms = plan_transforms(grid, storage, planner_flag) diff --git a/src/Solvers/fourier_tridiagonal_poisson_solver.jl b/src/Solvers/fourier_tridiagonal_poisson_solver.jl index 2f93f2f51a..1971ab8d52 100644 --- a/src/Solvers/fourier_tridiagonal_poisson_solver.jl +++ b/src/Solvers/fourier_tridiagonal_poisson_solver.jl @@ -75,20 +75,20 @@ function FourierTridiagonalPoissonSolver(grid, planner_flag=FFTW.PATIENT) λ2 = poisson_eigenvalues(regular_siz2, regular_ext2, 2, regular_top2()) arch = architecture(grid) - λ1 = arch_array(arch, λ1) - λ2 = arch_array(arch, λ2) + λ1 = on_architecture(arch, λ1) + λ2 = on_architecture(arch, λ2) # Plan required transforms for x and y - sol_storage = arch_array(arch, zeros(complex(eltype(grid)), size(grid)...)) + sol_storage = on_architecture(arch, zeros(complex(eltype(grid)), size(grid)...)) transforms = plan_transforms(grid, sol_storage, planner_flag) # Lower and upper diagonals are the same lower_diagonal = CUDA.@allowscalar [ 1 / Δξᶠ(q, grid) for q in 2:size(grid, irreg_dim) ] - lower_diagonal = arch_array(arch, lower_diagonal) + lower_diagonal = on_architecture(arch, lower_diagonal) upper_diagonal = lower_diagonal # Compute diagonal coefficients for each grid point - diagonal = arch_array(arch, zeros(size(grid)...)) + diagonal = on_architecture(arch, zeros(size(grid)...)) launch_config = if grid isa YZRegularRG :yz elseif grid isa XZRegularRG @@ -108,7 +108,7 @@ function FourierTridiagonalPoissonSolver(grid, planner_flag=FFTW.PATIENT) buffer = buffer_needed ? similar(sol_storage) : nothing # Storage space for right hand side of Poisson equation - rhs = arch_array(arch, zeros(complex(eltype(grid)), size(grid)...)) + rhs = on_architecture(arch, zeros(complex(eltype(grid)), size(grid)...)) return FourierTridiagonalPoissonSolver(grid, btsolver, rhs, sol_storage, buffer, transforms) end diff --git a/src/Solvers/heptadiagonal_iterative_solver.jl b/src/Solvers/heptadiagonal_iterative_solver.jl index e192acba13..d3fb92f86a 100644 --- a/src/Solvers/heptadiagonal_iterative_solver.jl +++ b/src/Solvers/heptadiagonal_iterative_solver.jl @@ -1,5 +1,5 @@ using Oceananigans.Architectures -using Oceananigans.Architectures: architecture, arch_array, unsafe_free! +using Oceananigans.Architectures: architecture, on_architecture, unsafe_free! using Oceananigans.Grids: interior_parent_indices, topology using Oceananigans.Utils: heuristic_workgroup using KernelAbstractions: @kernel, @index @@ -36,7 +36,7 @@ end placeholder_timestep = -1.0, preconditioner_method = :Default, preconditioner_settings = nothing, - template = arch_array(architecture(grid), zeros(prod(size(grid)))), + template = on_architecture(architecture(grid), zeros(prod(size(grid)))), verbose = false) Return a `HeptadiagonalIterativeSolver` to solve the problem `A * x = b`, provided @@ -90,7 +90,7 @@ function HeptadiagonalIterativeSolver(coeffs; placeholder_timestep = -1.0, preconditioner_method = :Default, preconditioner_settings = nothing, - template = arch_array(architecture(grid), zeros(prod(size(grid)))), + template = on_architecture(architecture(grid), zeros(prod(size(grid)))), verbose = false) arch = architecture(grid) @@ -138,11 +138,11 @@ Return the sparse matrix constructors based on the pentadiagonal coeffients (`co function matrix_from_coefficients(arch, grid, coeffs, reduced_dim) Ax, Ay, Az, C, D = coeffs - Ax = arch_array(CPU(), Ax) - Ay = arch_array(CPU(), Ay) - Az = arch_array(CPU(), Az) - C = arch_array(CPU(), C) - D = arch_array(arch, D) + Ax = on_architecture(CPU(), Ax) + Ay = on_architecture(CPU(), Ay) + Az = on_architecture(CPU(), Az) + C = on_architecture(CPU(), C) + D = on_architecture(arch, D) N = size(grid) @@ -151,7 +151,7 @@ function matrix_from_coefficients(arch, grid, coeffs, reduced_dim) dims = validate_laplacian_direction.(N, topo, reduced_dim) Nx, Ny, Nz = N = validate_laplacian_size.(N, dims) M = prod(N) - diag = arch_array(arch, zeros(eltype(grid), M)) + diag = on_architecture(arch, zeros(eltype(grid), M)) # the following coefficients are the diagonals of the sparse matrix: # - coeff_d is the main diagonal (coefficents of ηᵢⱼₖ) diff --git a/src/Solvers/sparse_preconditioners.jl b/src/Solvers/sparse_preconditioners.jl index 8706469878..e1415dedae 100644 --- a/src/Solvers/sparse_preconditioners.jl +++ b/src/Solvers/sparse_preconditioners.jl @@ -131,13 +131,13 @@ function asymptotic_diagonal_inverse_preconditioner(A::AbstractMatrix; asymptoti M = size(A, 1) - invdiag = arch_array(arch, zeros(eltype(nzval), M)) + invdiag = on_architecture(arch, zeros(eltype(nzval), M)) loop! = _get_inv_diag!(dev, 256, M) loop!(invdiag, colptr, rowval, nzval) if asymptotic_order == 0 - Minv_cpu = spdiagm(0=>arch_array(CPU(), invdiag)) + Minv_cpu = spdiagm(0=>on_architecture(CPU(), invdiag)) Minv = arch_sparse_matrix(arch, Minv_cpu) elseif asymptotic_order == 1 loop! = _initialize_asymptotic_diagonal_inverse_preconditioner_first_order!(dev, 256, M) @@ -147,7 +147,7 @@ function asymptotic_diagonal_inverse_preconditioner(A::AbstractMatrix; asymptoti Minv = arch_sparse_matrix(arch, constructors(arch, M, M, constr_new)) else D = spdiagm(0=>diag(arch_sparse_matrix(CPU(), A))) - D⁻¹ = spdiagm(0=>arch_array(CPU(), invdiag)) + D⁻¹ = spdiagm(0=>on_architecture(CPU(), invdiag)) Minv_cpu = D⁻¹ * (I - (A - D) * D⁻¹ + (A - D) * D⁻¹ * (A - D) * D⁻¹) Minv = arch_sparse_matrix(arch, Minv_cpu) end diff --git a/src/TurbulenceClosures/implicit_explicit_time_discretization.jl b/src/TurbulenceClosures/implicit_explicit_time_discretization.jl index facfbfc562..33fe206185 100644 --- a/src/TurbulenceClosures/implicit_explicit_time_discretization.jl +++ b/src/TurbulenceClosures/implicit_explicit_time_discretization.jl @@ -1,4 +1,4 @@ -using Oceananigans.Utils: arch_array +using Oceananigans.Utils: on_architecture using Oceananigans.Grids: AbstractGrid abstract type AbstractTimeDiscretization end diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/convective_adjustment_vertical_diffusivity.jl b/src/TurbulenceClosures/turbulence_closure_implementations/convective_adjustment_vertical_diffusivity.jl index f8da912926..a973edc0a0 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/convective_adjustment_vertical_diffusivity.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/convective_adjustment_vertical_diffusivity.jl @@ -1,4 +1,4 @@ -using Oceananigans.Architectures: architecture, arch_array +using Oceananigans.Architectures: architecture, on_architecture using Oceananigans.AbstractOperations: KernelFunctionOperation using Oceananigans.BuoyancyModels: ∂z_b using Oceananigans.Operators: ℑzᵃᵃᶜ diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/isopycnal_skew_symmetric_diffusivity.jl b/src/TurbulenceClosures/turbulence_closure_implementations/isopycnal_skew_symmetric_diffusivity.jl index ed63c418c3..e79a1a76d1 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/isopycnal_skew_symmetric_diffusivity.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/isopycnal_skew_symmetric_diffusivity.jl @@ -68,7 +68,7 @@ function with_tracers(tracers, closure_vector::ISSDVector) Ex = length(closure_vector) closure_vector = [with_tracers(tracers, closure_vector[i]) for i=1:Ex] - return arch_array(arch, closure_vector) + return on_architecture(arch, closure_vector) end # Note: computing diffusivities at cell centers for now. diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl b/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl index db17a26370..21f44f3000 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/ri_based_vertical_diffusivity.jl @@ -1,4 +1,4 @@ -using Oceananigans.Architectures: architecture, arch_array +using Oceananigans.Architectures: architecture, on_architecture using Oceananigans.BuoyancyModels: ∂z_b using Oceananigans.Operators using Oceananigans.Grids: inactive_node diff --git a/test/test_abstract_operations.jl b/test/test_abstract_operations.jl index 36e90d8f07..3e9c7e6cab 100644 --- a/test/test_abstract_operations.jl +++ b/test/test_abstract_operations.jl @@ -23,7 +23,7 @@ function x_derivative(a) dx_a = ∂x(a) arch = architecture(a) - one_two_three = arch_array(arch, [1, 2, 3]) + one_two_three = on_architecture(arch, [1, 2, 3]) for k in 1:3 interior(a)[:, 1, k] .= one_two_three @@ -38,7 +38,7 @@ function y_derivative(a) dy_a = ∂y(a) arch = architecture(a) - one_three_five = arch_array(arch, [1, 3, 5]) + one_three_five = on_architecture(arch, [1, 3, 5]) for k in 1:3 interior(a)[1, :, k] .= one_three_five @@ -53,7 +53,7 @@ function z_derivative(a) dz_a = ∂z(a) arch = architecture(a) - one_four_seven = arch_array(arch, [1, 4, 7]) + one_four_seven = on_architecture(arch, [1, 4, 7]) for k in 1:3 interior(a)[1, k, :] .= one_four_seven @@ -69,7 +69,7 @@ function x_derivative_cell(arch) a = Field{Center, Center, Center}(grid) dx_a = ∂x(a) - one_four_four = arch_array(arch, [1, 4, 4]) + one_four_four = on_architecture(arch, [1, 4, 4]) for k in 1:3 interior(a)[:, 1, k] .= one_four_four diff --git a/test/test_broadcasting.jl b/test/test_broadcasting.jl index 5e9b5aa639..6f2e8a6788 100644 --- a/test/test_broadcasting.jl +++ b/test/test_broadcasting.jl @@ -108,7 +108,7 @@ include("dependencies_for_runtests.jl") two_two_two_grid = RectilinearGrid(arch, size=(2, 2, 2), extent=(1, 1, 1)) c = CenterField(two_two_two_grid) - random_column = arch_array(arch, reshape(rand(2), 1, 1, 2)) + random_column = on_architecture(arch, reshape(rand(2), 1, 1, 2)) c .= random_column # broadcast to every horizontal column in c diff --git a/test/test_field.jl b/test/test_field.jl index a470c6265b..2392ccc896 100644 --- a/test/test_field.jl +++ b/test/test_field.jl @@ -34,7 +34,7 @@ function correct_field_value_was_set(grid, FieldType, val::Number) arch = architecture(grid) f = FieldType(grid) set!(f, val) - return all(interior(f) .≈ val * arch_array(arch, ones(size(f)))) + return all(interior(f) .≈ val * on_architecture(arch, ones(size(f)))) end function run_field_reduction_tests(FT, arch) @@ -58,10 +58,10 @@ function run_field_reduction_tests(FT, arch) c_vals = f.(nodes(c, reshape=true)...) # Convert to CuArray if needed. - u_vals = arch_array(arch, u_vals) - v_vals = arch_array(arch, v_vals) - w_vals = arch_array(arch, w_vals) - c_vals = arch_array(arch, c_vals) + u_vals = on_architecture(arch, u_vals) + v_vals = on_architecture(arch, v_vals) + w_vals = on_architecture(arch, w_vals) + c_vals = on_architecture(arch, c_vals) ϕs_vals = (u_vals, v_vals, w_vals, c_vals) @@ -149,11 +149,11 @@ function run_field_interpolation_tests(grid) zs = Array(reshape([-1.3, 1.23, 2.1], (1, 1, 3))) X = [(xs[i], ys[j], zs[k]) for i=1:3, j=1:3, k=1:3] - X = arch_array(arch, X) + X = on_architecture(arch, X) - xs = arch_array(arch, xs) - ys = arch_array(arch, ys) - zs = arch_array(arch, zs) + xs = on_architecture(arch, xs) + ys = on_architecture(arch, ys) + zs = on_architecture(arch, zs) CUDA.@allowscalar begin for f in (u, v, w, c) diff --git a/test/test_field_reductions.jl b/test/test_field_reductions.jl index 3319cdb9d4..f44712ab68 100644 --- a/test/test_field_reductions.jl +++ b/test/test_field_reductions.jl @@ -1,7 +1,7 @@ include("dependencies_for_runtests.jl") using Statistics -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.AbstractOperations: BinaryOperation using Oceananigans.Fields: ReducedField, CenterField, ZFaceField, compute_at!, @compute using Oceananigans.BoundaryConditions: fill_halo_regions! diff --git a/test/test_hydrostatic_free_surface_immersed_boundaries.jl b/test/test_hydrostatic_free_surface_immersed_boundaries.jl index 76953dddf0..d262813b66 100644 --- a/test/test_hydrostatic_free_surface_immersed_boundaries.jl +++ b/test/test_hydrostatic_free_surface_immersed_boundaries.jl @@ -65,7 +65,7 @@ using Oceananigans.TurbulenceClosures bathymetry = zeros(Nx, Ny) .- 4000 view(bathymetry, 31:34, 43:47) .= 0 - bathymetry = arch_array(arch, bathymetry) + bathymetry = on_architecture(arch, bathymetry) grid = ImmersedBoundaryGrid(underlying_grid, GridFittedBottom(bathymetry)) diff --git a/test/test_hydrostatic_free_surface_immersed_boundaries_implicit_solve.jl b/test/test_hydrostatic_free_surface_immersed_boundaries_implicit_solve.jl index 06e0021986..9cf8116bbb 100644 --- a/test/test_hydrostatic_free_surface_immersed_boundaries_implicit_solve.jl +++ b/test/test_hydrostatic_free_surface_immersed_boundaries_implicit_solve.jl @@ -1,7 +1,7 @@ include("dependencies_for_runtests.jl") using Oceananigans.ImmersedBoundaries: ImmersedBoundaryGrid, GridFittedBottom -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.TurbulenceClosures using Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_vertically_integrated_volume_flux!, compute_implicit_free_surface_right_hand_side!, @@ -31,7 +31,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_vertically_integ bottom = [-1. for j=1:Ny, i=1:Nx] bottom[imm1-1:imp1+1, jmm1-1:jmp1+1] .= 0 - B = arch_array(arch, bottom) + B = on_architecture(arch, bottom) grid = ImmersedBoundaryGrid(underlying_grid, GridFittedBottom(B)) free_surfaces = [ImplicitFreeSurface(solver_method=:HeptadiagonalIterativeSolver, gravitational_acceleration=1.0), diff --git a/test/test_lagrangian_particle_tracking.jl b/test/test_lagrangian_particle_tracking.jl index 9af8ba9f31..1e6c9b6ac4 100644 --- a/test/test_lagrangian_particle_tracking.jl +++ b/test/test_lagrangian_particle_tracking.jl @@ -2,7 +2,7 @@ include("dependencies_for_runtests.jl") using NCDatasets using StructArrays -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture struct TestParticle{T} x :: T @@ -54,9 +54,9 @@ function run_simple_particle_tracking_tests(arch, timestepper; vertically_stretc ##### Test default particle ##### - xs = arch_array(arch, 0.6*ones(P)) - ys = arch_array(arch, 0.58*ones(P)) - zs = arch_array(arch, 0.8*ones(P)) + xs = on_architecture(arch, 0.6*ones(P)) + ys = on_architecture(arch, 0.58*ones(P)) + zs = on_architecture(arch, 0.8*ones(P)) particles = LagrangianParticles(x=xs, y=ys, z=zs) @test particles isa LagrangianParticles @@ -80,7 +80,7 @@ function run_simple_particle_tracking_tests(arch, timestepper; vertically_stretc initial_z = CUDA.@allowscalar grid.zᵃᵃᶜ[grid.Nz - 1] top_boundary = CUDA.@allowscalar grid.zᵃᵃᶠ[grid.Nz + 1] - x, y, z = arch_array.(Ref(arch), ([0.0], [0.0], [initial_z])) + x, y, z = on_architecture.(Ref(arch), ([0.0], [0.0], [initial_z])) particles = LagrangianParticles(; x, y, z) u, v, w = VelocityFields(grid) @@ -102,13 +102,13 @@ function run_simple_particle_tracking_tests(arch, timestepper; vertically_stretc ##### Test custom particle "SpeedTrackingParticle" ##### - xs = arch_array(arch, zeros(P)) - ys = arch_array(arch, zeros(P)) - zs = arch_array(arch, 0.5 * ones(P)) - us = arch_array(arch, zeros(P)) - vs = arch_array(arch, zeros(P)) - ws = arch_array(arch, zeros(P)) - ss = arch_array(arch, zeros(P)) + xs = on_architecture(arch, zeros(P)) + ys = on_architecture(arch, zeros(P)) + zs = on_architecture(arch, 0.5 * ones(P)) + us = on_architecture(arch, zeros(P)) + vs = on_architecture(arch, zeros(P)) + ws = on_architecture(arch, zeros(P)) + ss = on_architecture(arch, zeros(P)) # Test custom constructor particles = StructArray{TestParticle}((xs, ys, zs, us, vs, ws, ss)) diff --git a/test/test_matrix_poisson_solver.jl b/test/test_matrix_poisson_solver.jl index 79cbe01d7f..ad878cf24b 100644 --- a/test/test_matrix_poisson_solver.jl +++ b/test/test_matrix_poisson_solver.jl @@ -1,6 +1,6 @@ using Oceananigans.Solvers: solve!, HeptadiagonalIterativeSolver, sparse_approximate_inverse using Oceananigans.Operators: volume, Δyᶠᶜᵃ, Δyᶜᶠᵃ, Δyᶜᶜᵃ, Δxᶠᶜᵃ, Δxᶜᶠᵃ, Δxᶜᶜᵃ, Δyᵃᶜᵃ, Δxᶜᵃᵃ, Δzᵃᵃᶠ, Δzᵃᵃᶜ, ∇²ᶜᶜᶜ -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.Grids: architecture using KernelAbstractions: @kernel, @index using Statistics, LinearAlgebra, SparseArrays @@ -21,10 +21,10 @@ function run_identity_operator_test(grid) solver = HeptadiagonalIterativeSolver((A, A, A, C, D), grid = grid) - b = arch_array(architecture(grid), rand(M)) + b = on_architecture(architecture(grid), rand(M)) arch = architecture(grid) - storage = arch_array(arch, zeros(size(b))) + storage = on_architecture(arch, zeros(size(b))) solve!(storage, solver, b, 1.0) @test norm(Array(storage) .- Array(b)) .< solver.tolerance @@ -44,11 +44,11 @@ end function compute_poisson_weights(grid) N = size(grid) - Ax = arch_array(architecture(grid), zeros(N...)) - Ay = arch_array(architecture(grid), zeros(N...)) - Az = arch_array(architecture(grid), zeros(N...)) - C = arch_array(architecture(grid), zeros(grid, N...)) - D = arch_array(architecture(grid), zeros(grid, N...)) + Ax = on_architecture(architecture(grid), zeros(N...)) + Ay = on_architecture(architecture(grid), zeros(N...)) + Az = on_architecture(architecture(grid), zeros(N...)) + C = on_architecture(architecture(grid), zeros(grid, N...)) + D = on_architecture(architecture(grid), zeros(grid, N...)) launch!(architecture(grid), grid, :xyz, _compute_poisson_weights, Ax, Ay, Az, grid) @@ -85,7 +85,7 @@ function run_poisson_equation_test(grid) ϕ_solution = CenterField(grid) arch = architecture(grid) - storage = arch_array(arch, zeros(size(rhs))) + storage = on_architecture(arch, zeros(size(rhs))) solve!(storage, solver, rhs, 1.0) set!(ϕ_solution, reshape(storage, solver.problem_size...)) fill_halo_regions!(ϕ_solution) diff --git a/test/test_multi_region_poisson_solver.jl b/test/test_multi_region_poisson_solver.jl index 04d8bf065d..1d0716a11f 100644 --- a/test/test_multi_region_poisson_solver.jl +++ b/test/test_multi_region_poisson_solver.jl @@ -2,7 +2,7 @@ include("dependencies_for_runtests.jl") using Oceananigans.Solvers: solve!, HeptadiagonalIterativeSolver, sparse_approximate_inverse using Oceananigans.Operators: volume, Δyᶠᶜᵃ, Δyᶜᶠᵃ, Δyᶜᶜᵃ, Δxᶠᶜᵃ, Δxᶜᶠᵃ, Δxᶜᶜᵃ, Δyᵃᶜᵃ, Δxᶜᵃᵃ, Δzᵃᵃᶠ, Δzᵃᵃᶜ, ∇²ᶜᶜᶜ -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using KernelAbstractions: @kernel, @index using Statistics, LinearAlgebra, SparseArrays @@ -55,7 +55,7 @@ function compute_poisson_weights(grid) Ay = zeros(N...) Az = zeros(N...) C = zeros(grid, N...) - D = arch_array(grid.architecture, ones(N...)) + D = on_architecture(grid.architecture, ones(N...)) for k = 1:grid.Nz, j = 1:grid.Ny, i = 1:grid.Nx Ax[i, j, k] = Δzᵃᵃᶜ(i, j, k, grid) * Δyᶠᶜᵃ(i, j, k, grid) / Δxᶠᶜᵃ(i, j, k, grid) diff --git a/test/test_sewater_density.jl b/test/test_sewater_density.jl index adf448f6a7..8a67ad05be 100644 --- a/test/test_sewater_density.jl +++ b/test/test_sewater_density.jl @@ -22,7 +22,7 @@ function grid_size_value(arch, grid, value) value_array = fill(value, size(grid)) - return arch_array(arch, value_array) + return on_architecture(arch, value_array) end diff --git a/test/test_shallow_water_models.jl b/test/test_shallow_water_models.jl index c23fd79af6..2e70999669 100644 --- a/test/test_shallow_water_models.jl +++ b/test/test_shallow_water_models.jl @@ -65,7 +65,7 @@ function test_shallow_water_diffusion_cosine(grid, formulation, fieldname, ξ) field = model.velocities[fieldname] - interior(field) .= arch_array(architecture(grid), cos.(m * ξ)) + interior(field) .= on_architecture(architecture(grid), cos.(m * ξ)) update_state!(model) # Step forward with small time-step relative to diff. time-scale diff --git a/validation/barotropic_gyre/barotropic_gyre.jl b/validation/barotropic_gyre/barotropic_gyre.jl index 2d74476713..db641b63c5 100644 --- a/validation/barotropic_gyre/barotropic_gyre.jl +++ b/validation/barotropic_gyre/barotropic_gyre.jl @@ -34,7 +34,7 @@ underlying_grid = LatitudeLongitudeGrid(size = (Nx, Ny, 1), ## bathymetry = zeros(Nx, Ny) .- 4000 ## view(bathymetry, 31:34, 43:47) .= 0 -## bathymetry = arch_array(arch, bathymetry) +## bathymetry = on_architecture(arch, bathymetry) ## grid = ImmersedBoundaryGrid(underlying_grid, GridFittedBottom(bathymetry) ) grid = underlying_grid diff --git a/validation/multi_region/multi_region_near_global_quarter_degree.jl b/validation/multi_region/multi_region_near_global_quarter_degree.jl index d5cb752b33..067b6f4c70 100644 --- a/validation/multi_region/multi_region_near_global_quarter_degree.jl +++ b/validation/multi_region/multi_region_near_global_quarter_degree.jl @@ -7,7 +7,7 @@ using Oceananigans.Units using Oceananigans.MultiRegion using Oceananigans.MultiRegion: multi_region_object_from_array using Oceananigans.Fields: interpolate, Field -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.Coriolis: HydrostaticSphericalCoriolis using Oceananigans.BoundaryConditions using Oceananigans.ImmersedBoundaries: ImmersedBoundaryGrid, GridFittedBottom, inactive_node, peripheral_node @@ -94,7 +94,7 @@ T★ = file_temp["field"] S★ = file_salt["field"] # Remember the convention!! On the surface a negative flux increases a positive decreases -bathymetry = arch_array(arch, bathymetry) +bathymetry = on_architecture(arch, bathymetry) # Stretched faces taken from ECCO Version 4 (50 levels in the vertical) z_faces = file_z_faces["z_faces"][3:end] diff --git a/validation/shallow_water_model/near_global_shallow_water_quarter_degree.jl b/validation/shallow_water_model/near_global_shallow_water_quarter_degree.jl index 0cd7ccd929..00ed065c13 100644 --- a/validation/shallow_water_model/near_global_shallow_water_quarter_degree.jl +++ b/validation/shallow_water_model/near_global_shallow_water_quarter_degree.jl @@ -7,7 +7,7 @@ using Oceananigans.Units using Oceananigans.MultiRegion using Oceananigans.MultiRegion: multi_region_object_from_array using Oceananigans.Fields: interpolate, Field -using Oceananigans.Architectures: arch_array +using Oceananigans.Architectures: on_architecture using Oceananigans.Coriolis: HydrostaticSphericalCoriolis using Oceananigans.BoundaryConditions using Oceananigans.Grids: boundary_node, inactive_node, peripheral_node @@ -84,8 +84,8 @@ end # Files contain 1 year (1992) of 12 monthly averages τˣ = file_tau_x["field"] ./ reference_density τʸ = file_tau_y["field"] ./ reference_density -τˣ = arch_array(arch, τˣ) -τʸ = arch_array(arch, τʸ) +τˣ = on_architecture(arch, τˣ) +τʸ = on_architecture(arch, τʸ) bat = file_bathymetry["bathymetry"] boundary = Int.(bat .> 0) From 5ae1a3fdbee68113e3ed23a8d5202528312b4919 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 1 Mar 2024 09:51:36 -0500 Subject: [PATCH 02/30] couple of more additions --- src/Architectures.jl | 9 +++++++++ src/ImmersedBoundaries/partial_cell_bottom.jl | 3 +++ .../explicit_free_surface.jl | 4 ++++ .../implicit_free_surface.jl | 8 ++++++++ .../prescribed_hydrostatic_velocity_fields.jl | 8 +++++++- .../split_explicit_free_surface.jl | 7 +++++++ .../shallow_water_diffusion_operators.jl | 4 ++++ src/OutputReaders/field_time_series.jl | 11 +++++++++++ src/TurbulenceClosures/discrete_diffusion_function.jl | 4 ++++ .../turbulent_kinetic_energy_equation.jl | 5 +++++ .../scalar_biharmonic_diffusivity.jl | 6 ++++++ .../scalar_diffusivity.jl | 6 ++++++ 12 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index aebc19f4bb..600417edb4 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -56,6 +56,15 @@ child_architecture(arch) = arch array_type(::CPU) = Array array_type(::GPU) = CuArray +# Fallback +on_architecture(arch, a) = a + +# Tupled implementation +@inline on_architecture(arch, t::Tuple{}) = () +@inline on_architecture(arch, t::Tuple) = Tuple(on_architecture(arch, elem) for elem in t) +@inline on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) + +# On architecture for array types on_architecture(::CPU, a::Array) = a on_architecture(::CPU, a::CuArray) = Array(a) on_architecture(::GPU, a::Array) = CuArray(a) diff --git a/src/ImmersedBoundaries/partial_cell_bottom.jl b/src/ImmersedBoundaries/partial_cell_bottom.jl index 94fff4cbca..f6ba0368e3 100644 --- a/src/ImmersedBoundaries/partial_cell_bottom.jl +++ b/src/ImmersedBoundaries/partial_cell_bottom.jl @@ -81,6 +81,9 @@ end Adapt.adapt_structure(to, ib::PartialCellBottom) = PartialCellBottom(adapt(to, ib.bottom_height.data), ib.minimum_fractional_cell_height) +on_architecture(to, ib::PartialCellBottom) = PartialCellBottom(on_architecture(to, ib.bottom_height.data), + on_architecture(to, ib.minimum_fractional_cell_height)) + """ --x-- diff --git a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl index 672322068f..3d210e6778 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl @@ -24,6 +24,10 @@ ExplicitFreeSurface(; gravitational_acceleration=g_Earth) = Adapt.adapt_structure(to, free_surface::ExplicitFreeSurface) = ExplicitFreeSurface(Adapt.adapt(to, free_surface.η), free_surface.gravitational_acceleration) +on_architecture(to, free_surface::ExplicitFreeSurface) = + ExplicitFreeSurface(on_architecture(to, free_surface.η), + on_architecture(to, free_surface.gravitational_acceleration)) + ##### ##### Interface to HydrostaticFreeSurfaceModel ##### diff --git a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl index d835e2a646..d745d4dc0a 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl @@ -81,6 +81,14 @@ Adapt.adapt_structure(to, free_surface::ImplicitFreeSurface) = ImplicitFreeSurface(Adapt.adapt(to, free_surface.η), free_surface.gravitational_acceleration, nothing, nothing, nothing, nothing) +on_architecture(to, free_surface::ImplicitFreeSurface) = + ImplicitFreeSurface(on_architecture(to, free_surface.η), + on_architecture(to, free_surface.gravitational_acceleration), + on_architecture(to, free_surface.barotropic_volume_flux), + on_architecture(to, free_surface.implicit_step_solver), + on_architecture(to, free_surface.solver_methods), + on_architecture(to, free_surface.solver_settings)) + # Internal function for HydrostaticFreeSurfaceModel function FreeSurface(free_surface::ImplicitFreeSurface{Nothing}, velocities, grid) η = FreeSurfaceDisplacementField(velocities, free_surface, grid) diff --git a/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl b/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl index 9905bfc586..6d1dc41de0 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl @@ -111,7 +111,13 @@ Adapt.adapt_structure(to, velocities::PrescribedVelocityFields) = PrescribedVelocityFields(Adapt.adapt(to, velocities.u), Adapt.adapt(to, velocities.v), Adapt.adapt(to, velocities.w), - nothing) + nothing) # Why are parameters not passed here? They probably should... + +on_architecture(to, velocities::PrescribedVelocityFields) = + PrescribedVelocityFields(on_architecture(to, velocities.u), + on_architecture(to, velocities.v), + on_architecture(to, velocities.w), + on_architecture(to, velocities.parameters)) # If the model only tracks particles... do nothing but that!!! const OnlyParticleTrackingModel = HydrostaticFreeSurfaceModel{TS, E, A, S, G, T, V, B, R, F, P, U, C} where diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index d32b5786d8..315238277a 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -364,3 +364,10 @@ end Adapt.adapt_structure(to, free_surface::SplitExplicitFreeSurface) = SplitExplicitFreeSurface(Adapt.adapt(to, free_surface.η), nothing, nothing, free_surface.gravitational_acceleration, nothing) + +on_architecture(to, free_surface::SplitExplicitFreeSurface) = + SplitExplicitFreeSurface(on_architecture(to, free_surface.η), + on_architecture(to, free_surface.state), + on_architecture(to, free_surface.auxiliary), + on_architecture(to, free_surface.gravitational_acceleration), + on_architecture(to, free_surface.settings)) diff --git a/src/Models/ShallowWaterModels/shallow_water_diffusion_operators.jl b/src/Models/ShallowWaterModels/shallow_water_diffusion_operators.jl index c3f2f82729..6e107b4e76 100644 --- a/src/Models/ShallowWaterModels/shallow_water_diffusion_operators.jl +++ b/src/Models/ShallowWaterModels/shallow_water_diffusion_operators.jl @@ -51,6 +51,10 @@ viscosity(closure::ShallowWaterScalarDiffusivity, K) = closure.ν Adapt.adapt_structure(to, closure::ShallowWaterScalarDiffusivity{B}) where B = ShallowWaterScalarDiffusivity{B}(Adapt.adapt(to, closure.ν), Adapt.adapt(to, closure.ξ)) +on_architecture(to, closure::ShallowWaterScalarDiffusivity{B}) where B = + ShallowWaterScalarDiffusivity{B}(on_architecture(to, closure.ν), on_architecture(to, closure.ξ)) + + # The diffusivity for the shallow water model is calculated as h*ν in order to have a viscous term in the form # h⁻¹ ∇ ⋅ (hν t) where t is the 2D stress tensor plus a trace => t = ∇u + (∇u)ᵀ - ξI⋅(∇⋅u) diff --git a/src/OutputReaders/field_time_series.jl b/src/OutputReaders/field_time_series.jl index 46dab7332d..9d015223e7 100644 --- a/src/OutputReaders/field_time_series.jl +++ b/src/OutputReaders/field_time_series.jl @@ -267,6 +267,17 @@ mutable struct FieldTimeSeries{LX, LY, LZ, TI, K, I, D, G, ET, B, χ, P, N} <: A end end +on_architecture(to, fts::FieldTimeSeries{LX, LY, LZ}) where {LX, LY, LZ} = + FieldTimeSeries{LX, LY, LZ}(on_architecture(to, data), + on_architecture(to, grid), + on_architecture(to, backend), + on_architecture(to, bcs), + on_architecture(to, indices), + on_architecture(to, times), + on_architecture(to, path), + on_architecture(to, name), + on_architecture(to, time_indexing)) + ##### ##### Minimal implementation of FieldTimeSeries for use in GPU kernels ##### diff --git a/src/TurbulenceClosures/discrete_diffusion_function.jl b/src/TurbulenceClosures/discrete_diffusion_function.jl index 9e7a87c108..19d8203783 100644 --- a/src/TurbulenceClosures/discrete_diffusion_function.jl +++ b/src/TurbulenceClosures/discrete_diffusion_function.jl @@ -101,3 +101,7 @@ end Adapt.adapt_structure(to, dd::DiscreteDiffusionFunction{LX, LY, LZ}) where {LX, LY, LZ} = DiscreteDiffusionFunction{LX, LY, LZ}(Adapt.adapt(to, dd.func), Adapt.adapt(to, dd.parameters)) + +on_architecture(to, dd::DiscreteDiffusionFunction{LX, LY, LZ}) where {LX, LY, LZ} = + DiscreteDiffusionFunction{LX, LY, LZ}(on_architecture(to, dd.func), + on_architecture(to, dd.parameters)) diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl index 58d82af1af..3acb1763bf 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/CATKEVerticalDiffusivities/turbulent_kinetic_energy_equation.jl @@ -248,6 +248,11 @@ end TKETopBoundaryConditionParameters(adapt(to, p.top_tracer_boundary_conditions), adapt(to, p.top_velocity_boundary_conditions)) +@inline on_architecture(to, p::TKETopBoundaryConditionParameters) = + TKETopBoundaryConditionParameters(on_architecture(to, p.top_tracer_boundary_conditions), + on_architecture(to, p.top_velocity_boundary_conditions)) + + using Oceananigans.BoundaryConditions: Flux const TKEBoundaryFunction = DiscreteBoundaryFunction{<:TKETopBoundaryConditionParameters} const TKEBoundaryCondition = BoundaryCondition{<:Flux, <:TKEBoundaryFunction} diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/scalar_biharmonic_diffusivity.jl b/src/TurbulenceClosures/turbulence_closure_implementations/scalar_biharmonic_diffusivity.jl index 00c109db85..05d565022e 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/scalar_biharmonic_diffusivity.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/scalar_biharmonic_diffusivity.jl @@ -109,3 +109,9 @@ function Adapt.adapt_structure(to, closure::ScalarBiharmonicDiffusivity{F, <:Any return ScalarBiharmonicDiffusivity{F, N}(ν, κ) end +function on_architecture(to, closure::ScalarBiharmonicDiffusivity{F, <:Any, <:Any, N}) where {F, N} + ν = on_architecture(to, closure.ν) + κ = on_architecture(to, closure.κ) + return ScalarBiharmonicDiffusivity{F, N}(ν, κ) +end + diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/scalar_diffusivity.jl b/src/TurbulenceClosures/turbulence_closure_implementations/scalar_diffusivity.jl index df66ead80b..aaeb2d50e0 100644 --- a/src/TurbulenceClosures/turbulence_closure_implementations/scalar_diffusivity.jl +++ b/src/TurbulenceClosures/turbulence_closure_implementations/scalar_diffusivity.jl @@ -208,3 +208,9 @@ function Adapt.adapt_structure(to, closure::ScalarDiffusivity{TD, F, <:Any, <:An κ = Adapt.adapt(to, closure.κ) return ScalarDiffusivity{TD, F, N}(ν, κ) end + +function on_architecture(to, closure::ScalarDiffusivity{TD, F, <:Any, <:Any, N}) where {TD, F, N} + ν = on_architecture(to, closure.ν) + κ = on_architecture(to, closure.κ) + return ScalarDiffusivity{TD, F, N}(ν, κ) +end From 920ef5ba9aab267fe70bc16a916457d5962b78f8 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 1 Mar 2024 09:55:10 -0500 Subject: [PATCH 03/30] importing on_architecture here and there --- src/Fields/Fields.jl | 2 ++ src/Forcings/Forcings.jl | 1 + src/Forcings/advective_forcing.jl | 2 -- src/Forcings/continuous_forcing.jl | 2 -- src/Grids/Grids.jl | 2 +- .../HydrostaticFreeSurfaceModels.jl | 1 + src/TurbulenceClosures/TurbulenceClosures.jl | 1 + 7 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index b974f00700..cae3a72ee9 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -14,6 +14,8 @@ using Oceananigans.Grids using Oceananigans.BoundaryConditions using Oceananigans.Utils +import Oceananigans.Architectures: on_architecture + include("abstract_field.jl") include("constant_field.jl") include("function_field.jl") diff --git a/src/Forcings/Forcings.jl b/src/Forcings/Forcings.jl index c0d99c2a8c..0df515ba82 100644 --- a/src/Forcings/Forcings.jl +++ b/src/Forcings/Forcings.jl @@ -3,6 +3,7 @@ module Forcings export Forcing, ContinuousForcing, DiscreteForcing, Relaxation, GaussianMask, LinearTarget, AdvectiveForcing using Oceananigans.Fields +import Oceananigans.Architectures: on_architecture include("multiple_forcings.jl") include("continuous_forcing.jl") diff --git a/src/Forcings/advective_forcing.jl b/src/Forcings/advective_forcing.jl index ecf5191c47..1bf2c2083d 100644 --- a/src/Forcings/advective_forcing.jl +++ b/src/Forcings/advective_forcing.jl @@ -3,8 +3,6 @@ using Oceananigans.Fields: ZeroField, ConstantField using Oceananigans.Utils: SumOfArrays using Adapt -import Oceananigans.Architectures: on_architecture - maybe_constant_field(u) = u maybe_constant_field(u::Number) = ConstantField(u) diff --git a/src/Forcings/continuous_forcing.jl b/src/Forcings/continuous_forcing.jl index 6f3283358e..388ed3958d 100644 --- a/src/Forcings/continuous_forcing.jl +++ b/src/Forcings/continuous_forcing.jl @@ -5,8 +5,6 @@ using Oceananigans.Operators: assumed_field_location, index_and_interp_dependenc using Oceananigans.Fields: show_location using Oceananigans.Utils: user_function_arguments, tupleit, prettysummary -import Oceananigans.Architectures: on_architecture - """ ContinuousForcing{LX, LY, LZ, P, F, D, I, ℑ} diff --git a/src/Grids/Grids.jl b/src/Grids/Grids.jl index ae89b82d6f..586ef84f86 100644 --- a/src/Grids/Grids.jl +++ b/src/Grids/Grids.jl @@ -30,7 +30,7 @@ using Oceananigans using Oceananigans.Architectures import Base: size, length, eltype, show, - -import Oceananigans.Architectures: architecture +import Oceananigans.Architectures: architecture, on_architecture # Physical constants for constructors. const R_Earth = 6371.0e3 # [m] Mean radius of the Earth https://en.wikipedia.org/wiki/Earth diff --git a/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl b/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl index f5e1f793b6..5c1531d8a2 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl @@ -17,6 +17,7 @@ using DocStringExtensions import Oceananigans: fields, prognostic_fields, initialize! import Oceananigans.Advection: cell_advection_timescale import Oceananigans.TimeSteppers: step_lagrangian_particles! +import Oceananigans.Architectures: on_architecture abstract type AbstractFreeSurface{E, G} end diff --git a/src/TurbulenceClosures/TurbulenceClosures.jl b/src/TurbulenceClosures/TurbulenceClosures.jl index 60c27b9d3a..f401a8eab3 100644 --- a/src/TurbulenceClosures/TurbulenceClosures.jl +++ b/src/TurbulenceClosures/TurbulenceClosures.jl @@ -51,6 +51,7 @@ using Oceananigans.Utils using Oceananigans.Architectures: AbstractArchitecture, device using Oceananigans.Fields: FunctionField import Oceananigans.Advection: required_halo_size +import Oceananigans.Architectures: on_architecture const VerticallyBoundedGrid{FT} = AbstractGrid{FT, <:Any, <:Any, <:Bounded} From 9f20a5581c3e214c059c171026e211e3cec4f2e4 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 1 Mar 2024 15:22:47 -0500 Subject: [PATCH 04/30] on_architecture for split explicit --- .../split_explicit_free_surface.jl | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl index 315238277a..0f36e03fa3 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface.jl @@ -365,9 +365,19 @@ Adapt.adapt_structure(to, free_surface::SplitExplicitFreeSurface) = SplitExplicitFreeSurface(Adapt.adapt(to, free_surface.η), nothing, nothing, free_surface.gravitational_acceleration, nothing) -on_architecture(to, free_surface::SplitExplicitFreeSurface) = - SplitExplicitFreeSurface(on_architecture(to, free_surface.η), - on_architecture(to, free_surface.state), - on_architecture(to, free_surface.auxiliary), - on_architecture(to, free_surface.gravitational_acceleration), - on_architecture(to, free_surface.settings)) +for Type in (:SplitExplicitFreeSurface, + :SplitExplicitSettings, + :SplitExplicitState, + :SplitExplicitAuxiliaryFields, + :FixedTimeStepSize, + :FixedSubstepNumber) + + @eval begin + function on_architecture(to, settings::$Type) + args = Tuple(on_architecture(to, prop) for prop in propertynames(settings)) + return SplitExplicitState(args...) + end + end +end + + \ No newline at end of file From 6e384db94da25bb600c83a178ffac8b233157618 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 1 Mar 2024 15:25:58 -0500 Subject: [PATCH 05/30] keep deprecated arch_array --- src/Architectures.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Architectures.jl b/src/Architectures.jl index 600417edb4..d5de78e970 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -129,5 +129,12 @@ end @inline convert_args(::GPU, args) = CUDA.cudaconvert(args) @inline convert_args(::GPU, args::Tuple) = map(CUDA.cudaconvert, args) +# Deprecated functions + +function arch_array(arch, arr) + @warn "`arch_array` is deprecated. Use `on_architecture` instead" + return on_architecture(arch, arr) +end + end # module From a596b7d9954ce7bc28258636ff8b98141d1a6246 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Fri, 1 Mar 2024 15:26:23 -0500 Subject: [PATCH 06/30] typo --- src/Architectures.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index d5de78e970..b5fe68fde5 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -132,7 +132,7 @@ end # Deprecated functions function arch_array(arch, arr) - @warn "`arch_array` is deprecated. Use `on_architecture` instead" + @warn "`arch_array` is deprecated. Use `on_architecture` instead." return on_architecture(arch, arr) end From 0689b9e1e93d0b741aeb66ff376b7df7562fbd7c Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 3 Mar 2024 14:58:01 -0500 Subject: [PATCH 07/30] remove useless methods --- src/Architectures.jl | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index b5fe68fde5..7c6842eab1 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -56,7 +56,7 @@ child_architecture(arch) = arch array_type(::CPU) = Array array_type(::GPU) = CuArray -# Fallback +# Fallback on_architecture(arch, a) = a # Tupled implementation @@ -79,16 +79,6 @@ on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:CuArray}) = Array(a) on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:Array}) = CuArray(a) on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:Array}) = a -on_architecture(::CPU, a::AbstractRange) = a -on_architecture(::CPU, ::Nothing) = nothing -on_architecture(::CPU, a::Number) = a -on_architecture(::CPU, a::Function) = a - -on_architecture(::GPU, a::AbstractRange) = a -on_architecture(::GPU, ::Nothing) = nothing -on_architecture(::GPU, a::Number) = a -on_architecture(::GPU, a::Function) = a - on_architecture(arch::CPU, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) on_architecture(arch::GPU, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) From aa3bf068b49a26af21f00f006884a1556c8262e9 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 3 Mar 2024 14:58:16 -0500 Subject: [PATCH 08/30] no inline for tuples --- src/Architectures.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 7c6842eab1..2282fb136f 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -60,9 +60,9 @@ array_type(::GPU) = CuArray on_architecture(arch, a) = a # Tupled implementation -@inline on_architecture(arch, t::Tuple{}) = () -@inline on_architecture(arch, t::Tuple) = Tuple(on_architecture(arch, elem) for elem in t) -@inline on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) +on_architecture(arch, t::Tuple{}) = () +on_architecture(arch, t::Tuple) = Tuple(on_architecture(arch, elem) for elem in t) +on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) # On architecture for array types on_architecture(::CPU, a::Array) = a From 44c569184b77150cba23c86ee90a12c3ee21065a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Mar 2024 10:19:10 -0500 Subject: [PATCH 09/30] AbstractSerialArchitecture to disambiguate --- src/Architectures.jl | 11 +++++++++-- src/Grids/latitude_longitude_grid.jl | 2 +- src/Grids/orthogonal_spherical_shell_grid.jl | 2 +- src/Grids/rectilinear_grid.jl | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 2282fb136f..52e47c2d4c 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -16,20 +16,27 @@ Abstract supertype for architectures supported by Oceananigans. """ abstract type AbstractArchitecture end +""" + AbstractSerialArchitecture + +Abstract supertype for serial architectures supported by Oceananigans. +""" +abstract type AbstractSerialArchitecture <: AbstractArchitecture end + """ CPU <: AbstractArchitecture Run Oceananigans on one CPU node. Uses multiple threads if the environment variable `JULIA_NUM_THREADS` is set. """ -struct CPU <: AbstractArchitecture end +struct CPU <: AbstractSerialArchitecture end """ GPU <: AbstractArchitecture Run Oceananigans on a single NVIDIA CUDA GPU. """ -struct GPU <: AbstractArchitecture end +struct GPU <: AbstractSerialArchitecture end ##### ##### These methods are extended in DistributedComputations.jl diff --git a/src/Grids/latitude_longitude_grid.jl b/src/Grids/latitude_longitude_grid.jl index 3e5fafd730..91e28b1318 100644 --- a/src/Grids/latitude_longitude_grid.jl +++ b/src/Grids/latitude_longitude_grid.jl @@ -346,7 +346,7 @@ function with_halo(new_halo, old_grid::LatitudeLongitudeGrid) return new_grid end -function on_architecture(new_arch::AbstractArchitecture, old_grid::LatitudeLongitudeGrid) +function on_architecture(new_arch::AbstractSerialArchitecture, old_grid::LatitudeLongitudeGrid) old_properties = (old_grid.Δλᶠᵃᵃ, old_grid.Δλᶜᵃᵃ, old_grid.λᶠᵃᵃ, old_grid.λᶜᵃᵃ, old_grid.Δφᵃᶠᵃ, old_grid.Δφᵃᶜᵃ, old_grid.φᵃᶠᵃ, old_grid.φᵃᶜᵃ, old_grid.Δzᵃᵃᶠ, old_grid.Δzᵃᵃᶜ, old_grid.zᵃᵃᶠ, old_grid.zᵃᵃᶜ, diff --git a/src/Grids/orthogonal_spherical_shell_grid.jl b/src/Grids/orthogonal_spherical_shell_grid.jl index b8b3f0a723..b0ec80a6ca 100644 --- a/src/Grids/orthogonal_spherical_shell_grid.jl +++ b/src/Grids/orthogonal_spherical_shell_grid.jl @@ -873,7 +873,7 @@ function conformal_cubed_sphere_panel(filepath::AbstractString, architecture = C conformal_mapping) end -function on_architecture(arch::AbstractArchitecture, grid::OrthogonalSphericalShellGrid) +function on_architecture(arch::AbstractSerialArchitecture, grid::OrthogonalSphericalShellGrid) coordinates = (:λᶜᶜᵃ, :λᶠᶜᵃ, diff --git a/src/Grids/rectilinear_grid.jl b/src/Grids/rectilinear_grid.jl index 99f35afe09..6d22ad81e7 100644 --- a/src/Grids/rectilinear_grid.jl +++ b/src/Grids/rectilinear_grid.jl @@ -394,7 +394,7 @@ function with_halo(new_halo, old_grid::RectilinearGrid) return new_grid end -function on_architecture(new_arch::AbstractArchitecture, old_grid::RectilinearGrid) +function on_architecture(new_arch::AbstractSerialArchitecture, old_grid::RectilinearGrid) old_properties = (old_grid.Δxᶠᵃᵃ, old_grid.Δxᶜᵃᵃ, old_grid.xᶠᵃᵃ, old_grid.xᶜᵃᵃ, old_grid.Δyᵃᶠᵃ, old_grid.Δyᵃᶜᵃ, old_grid.yᵃᶠᵃ, old_grid.yᵃᶜᵃ, old_grid.Δzᵃᵃᶠ, old_grid.Δzᵃᵃᶜ, old_grid.zᵃᵃᶠ, old_grid.zᵃᵃᶜ) From 8b2322af36a2396d24367c30bc464e038cec8887 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Mar 2024 10:20:21 -0500 Subject: [PATCH 10/30] a comment --- src/DistributedComputations/distributed_architectures.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/DistributedComputations/distributed_architectures.jl b/src/DistributedComputations/distributed_architectures.jl index 7284ae9b66..2aa393c14b 100644 --- a/src/DistributedComputations/distributed_architectures.jl +++ b/src/DistributedComputations/distributed_architectures.jl @@ -261,7 +261,10 @@ const SynchronizedDistributed = Distributed{<:Any, true} child_architecture(arch::Distributed) = arch.child_architecture device(arch::Distributed) = device(child_architecture(arch)) -on_architecture(arch::Distributed, A) = on_architecture(child_architecture(arch), A) + +# We do not support switching from distributed and serial through `on_architecture`. +# We only support moving a type from CPU to GPU and the other way around +on_architecture(arch::Distributed, A) = on_architecture(child_architecture(arch), A) zeros(FT, arch::Distributed, N...) = zeros(FT, child_architecture(arch), N...) array_type(arch::Distributed) = array_type(child_architecture(arch)) sync_device!(arch::Distributed) = sync_device!(arch.child_architecture) From 6787bf8b31bd01c32426705138c48c7a0ca55f1b Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Mar 2024 10:26:51 -0500 Subject: [PATCH 11/30] export AbstractSerialArchitecture --- src/Architectures.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 52e47c2d4c..25e4334076 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -1,7 +1,7 @@ module Architectures -export AbstractArchitecture -export CPU, GPU, MultiGPU +export AbstractArchitecture, AbstractSerialArchitecture +export CPU, GPU export device, architecture, array_type, on_architecture, unified_array, device_copy_to! using CUDA From db94bdd0f70933b5bec3a1b92f1c0b3c9f25fa5e Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Mar 2024 11:26:13 -0500 Subject: [PATCH 12/30] import on_architecture in OutputReaders --- src/OutputReaders/field_time_series.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/OutputReaders/field_time_series.jl b/src/OutputReaders/field_time_series.jl index 9d015223e7..af6a34fb46 100644 --- a/src/OutputReaders/field_time_series.jl +++ b/src/OutputReaders/field_time_series.jl @@ -20,7 +20,7 @@ using Oceananigans.Fields: interior_view_indices, index_binary_search, using Oceananigans.Units: Time using Oceananigans.Utils: launch! -import Oceananigans.Architectures: architecture +import Oceananigans.Architectures: architecture, on_architecture import Oceananigans.BoundaryConditions: fill_halo_regions!, BoundaryCondition, getbc import Oceananigans.Fields: Field, set!, interior, indices, interpolate! From a90c600bc17324ac8ccfd0ff32b1791138af8cd3 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 4 Mar 2024 11:56:55 -0500 Subject: [PATCH 13/30] still export arch_array --- src/Architectures.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 25e4334076..4a0964d6b5 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -2,7 +2,8 @@ module Architectures export AbstractArchitecture, AbstractSerialArchitecture export CPU, GPU -export device, architecture, array_type, on_architecture, unified_array, device_copy_to! +export device, architecture, unified_array, device_copy_to! +export array_type, on_architecture, arch_array using CUDA using KernelAbstractions @@ -127,7 +128,6 @@ end @inline convert_args(::GPU, args::Tuple) = map(CUDA.cudaconvert, args) # Deprecated functions - function arch_array(arch, arr) @warn "`arch_array` is deprecated. Use `on_architecture` instead." return on_architecture(arch, arr) From 62180dc178556a36cbcc0b37b0fb06211b6318be Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 5 Mar 2024 10:06:34 -0500 Subject: [PATCH 14/30] Update src/AbstractOperations/grid_metrics.jl Co-authored-by: Navid C. Constantinou --- src/AbstractOperations/grid_metrics.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AbstractOperations/grid_metrics.jl b/src/AbstractOperations/grid_metrics.jl index 822c6df173..a04dcb50b1 100644 --- a/src/AbstractOperations/grid_metrics.jl +++ b/src/AbstractOperations/grid_metrics.jl @@ -131,8 +131,8 @@ Adapt.adapt_structure(to, gm::GridMetricOperation{LX, LY, LZ}) where {LX, LY, LZ Adapt.adapt(to, gm.grid)) on_architecture(to, gm::GridMetricOperation{LX, LY, LZ}) where {LX, LY, LZ} = - GridMetricOperation{LX, LY, LZ}(on_architecture(to, gm.metric), - on_architecture(to, gm.grid)) + GridMetricOperation{LX, LY, LZ}(on_architecture(to, gm.metric), + on_architecture(to, gm.grid)) @inline Base.getindex(gm::GridMetricOperation, i, j, k) = gm.metric(i, j, k, gm.grid) From c5e68a62e2428619126cb43ebac56d1069b0732b Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 5 Mar 2024 10:06:48 -0500 Subject: [PATCH 15/30] Update src/Architectures.jl Co-authored-by: Navid C. Constantinou --- src/Architectures.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 4a0964d6b5..75454e88c5 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -74,8 +74,9 @@ on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(ar # On architecture for array types on_architecture(::CPU, a::Array) = a -on_architecture(::CPU, a::CuArray) = Array(a) on_architecture(::GPU, a::Array) = CuArray(a) + +on_architecture(::CPU, a::CuArray) = Array(a) on_architecture(::GPU, a::CuArray) = a on_architecture(::CPU, a::BitArray) = a From cbedd8882211993a89dee01af3b7efe65046a466 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 5 Mar 2024 10:07:01 -0500 Subject: [PATCH 16/30] Update src/Architectures.jl Co-authored-by: Navid C. Constantinou --- src/Architectures.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 75454e88c5..2046ec8b4f 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -70,7 +70,7 @@ on_architecture(arch, a) = a # Tupled implementation on_architecture(arch, t::Tuple{}) = () on_architecture(arch, t::Tuple) = Tuple(on_architecture(arch, elem) for elem in t) -on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) +on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) # On architecture for array types on_architecture(::CPU, a::Array) = a From f2da3291b35f2660e73be2b33e141549f6bb8f77 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 5 Mar 2024 10:07:16 -0500 Subject: [PATCH 17/30] Update src/Advection/weno_reconstruction.jl Co-authored-by: Navid C. Constantinou --- src/Advection/weno_reconstruction.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Advection/weno_reconstruction.jl b/src/Advection/weno_reconstruction.jl index 0ad255a68d..49e00f9a0b 100644 --- a/src/Advection/weno_reconstruction.jl +++ b/src/Advection/weno_reconstruction.jl @@ -161,12 +161,12 @@ Adapt.adapt_structure(to, scheme::WENO{N, FT, XT, YT, ZT, WF, PP}) where {N, FT, Adapt.adapt(to, scheme.advecting_velocity_scheme)) on_architecture(to, scheme::WENO{N, FT, XT, YT, ZT, WF, PP}) where {N, FT, XT, YT, ZT, WF, PP} = - WENO{N, FT, WF}(on_architecture(to, scheme.coeff_xᶠᵃᵃ), on_architecture(to, scheme.coeff_xᶜᵃᵃ), - on_architecture(to, scheme.coeff_yᵃᶠᵃ), on_architecture(to, scheme.coeff_yᵃᶜᵃ), - on_architecture(to, scheme.coeff_zᵃᵃᶠ), on_architecture(to, scheme.coeff_zᵃᵃᶜ), - on_architecture(to, scheme.bounds), - on_architecture(to, scheme.buffer_scheme), - on_architecture(to, scheme.advecting_velocity_scheme)) + WENO{N, FT, WF}(on_architecture(to, scheme.coeff_xᶠᵃᵃ), on_architecture(to, scheme.coeff_xᶜᵃᵃ), + on_architecture(to, scheme.coeff_yᵃᶠᵃ), on_architecture(to, scheme.coeff_yᵃᶜᵃ), + on_architecture(to, scheme.coeff_zᵃᵃᶠ), on_architecture(to, scheme.coeff_zᵃᵃᶜ), + on_architecture(to, scheme.bounds), + on_architecture(to, scheme.buffer_scheme), + on_architecture(to, scheme.advecting_velocity_scheme)) # Retrieve precomputed coefficients (+2 for julia's 1 based indices) @inline retrieve_coeff(scheme::WENO, r, ::Val{1}, i, ::Type{Face}) = @inbounds scheme.coeff_xᶠᵃᵃ[r+2][i] From c1d0423b348000070843818b828ce1f05639667d Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 5 Mar 2024 10:08:03 -0500 Subject: [PATCH 18/30] Update src/Architectures.jl Co-authored-by: Navid C. Constantinou --- src/Architectures.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index 2046ec8b4f..1ede1f515c 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -68,7 +68,6 @@ array_type(::GPU) = CuArray on_architecture(arch, a) = a # Tupled implementation -on_architecture(arch, t::Tuple{}) = () on_architecture(arch, t::Tuple) = Tuple(on_architecture(arch, elem) for elem in t) on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) From 1a7debe4a917d15ef833ac7057559c6ade76d843 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:35:24 -0500 Subject: [PATCH 19/30] some changes to distributed on_architecture --- src/Architectures.jl | 8 ++-- .../distributed_architectures.jl | 3 -- .../distributed_on_architecture.jl | 44 +++++++++++++++++++ 3 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 src/DistributedComputations/distributed_on_architecture.jl diff --git a/src/Architectures.jl b/src/Architectures.jl index 4a0964d6b5..10e048f37a 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -68,9 +68,8 @@ array_type(::GPU) = CuArray on_architecture(arch, a) = a # Tupled implementation -on_architecture(arch, t::Tuple{}) = () -on_architecture(arch, t::Tuple) = Tuple(on_architecture(arch, elem) for elem in t) -on_architecture(arch, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) +on_architecture(arch::AbstractSerialArchitecture, t::Tuple) = Tuple(on_architecture(arch, elem) for elem in t) +on_architecture(arch::AbstractSerialArchitecture, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) # On architecture for array types on_architecture(::CPU, a::Array) = a @@ -87,8 +86,7 @@ on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:CuArray}) = Array(a) on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:Array}) = CuArray(a) on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:Array}) = a -on_architecture(arch::CPU, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) -on_architecture(arch::GPU, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) +on_architecture(arch::AbstractSerialArchitecture, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) cpu_architecture(::CPU) = CPU() cpu_architecture(::GPU) = CPU() diff --git a/src/DistributedComputations/distributed_architectures.jl b/src/DistributedComputations/distributed_architectures.jl index 2aa393c14b..73ab013355 100644 --- a/src/DistributedComputations/distributed_architectures.jl +++ b/src/DistributedComputations/distributed_architectures.jl @@ -262,9 +262,6 @@ const SynchronizedDistributed = Distributed{<:Any, true} child_architecture(arch::Distributed) = arch.child_architecture device(arch::Distributed) = device(child_architecture(arch)) -# We do not support switching from distributed and serial through `on_architecture`. -# We only support moving a type from CPU to GPU and the other way around -on_architecture(arch::Distributed, A) = on_architecture(child_architecture(arch), A) zeros(FT, arch::Distributed, N...) = zeros(FT, child_architecture(arch), N...) array_type(arch::Distributed) = array_type(child_architecture(arch)) sync_device!(arch::Distributed) = sync_device!(arch.child_architecture) diff --git a/src/DistributedComputations/distributed_on_architecture.jl b/src/DistributedComputations/distributed_on_architecture.jl new file mode 100644 index 0000000000..15ceed1e62 --- /dev/null +++ b/src/DistributedComputations/distributed_on_architecture.jl @@ -0,0 +1,44 @@ +import Oceananigans.Architectures: on_architecture + +# We do not support switching from distributed and serial through `on_architecture`. +# We only support moving a type from CPU to GPU and the other way around +# TODO: support changing the number of ranks / the partitioning? +on_architecture(arch::Distributed, A) = on_architecture(child_architecture(arch), A) + +function on_architecture(new_arch::Distributed, old_grid::LatitudeLongitudeGrid) + child_arch = child_architecture(new_arch) + old_properties = (old_grid.Δλᶠᵃᵃ, old_grid.Δλᶜᵃᵃ, old_grid.λᶠᵃᵃ, old_grid.λᶜᵃᵃ, + old_grid.Δφᵃᶠᵃ, old_grid.Δφᵃᶜᵃ, old_grid.φᵃᶠᵃ, old_grid.φᵃᶜᵃ, + old_grid.Δzᵃᵃᶠ, old_grid.Δzᵃᵃᶜ, old_grid.zᵃᵃᶠ, old_grid.zᵃᵃᶜ, + old_grid.Δxᶠᶜᵃ, old_grid.Δxᶜᶠᵃ, old_grid.Δxᶠᶠᵃ, old_grid.Δxᶜᶜᵃ, + old_grid.Δyᶠᶜᵃ, old_grid.Δyᶜᶠᵃ, + old_grid.Azᶠᶜᵃ, old_grid.Azᶜᶠᵃ, old_grid.Azᶠᶠᵃ, old_grid.Azᶜᶜᵃ) + + new_properties = Tuple(on_architecture(child_arch, p) for p in old_properties) + + TX, TY, TZ = topology(old_grid) + + return LatitudeLongitudeGrid{TX, TY, TZ}(new_arch, + old_grid.Nx, old_grid.Ny, old_grid.Nz, + old_grid.Hx, old_grid.Hy, old_grid.Hz, + old_grid.Lx, old_grid.Ly, old_grid.Lz, + new_properties..., + old_grid.radius) +end + +function on_architecture(new_arch::Distributed, old_grid::RectilinearGrid) + child_arch = child_architecture(new_arch) + old_properties = (old_grid.Δxᶠᵃᵃ, old_grid.Δxᶜᵃᵃ, old_grid.xᶠᵃᵃ, old_grid.xᶜᵃᵃ, + old_grid.Δyᵃᶠᵃ, old_grid.Δyᵃᶜᵃ, old_grid.yᵃᶠᵃ, old_grid.yᵃᶜᵃ, + old_grid.Δzᵃᵃᶠ, old_grid.Δzᵃᵃᶜ, old_grid.zᵃᵃᶠ, old_grid.zᵃᵃᶜ) + + new_properties = Tuple(on_architecture(child_arch, p) for p in old_properties) + + TX, TY, TZ = topology(old_grid) + + return RectilinearGrid{TX, TY, TZ}(new_arch, + old_grid.Nx, old_grid.Ny, old_grid.Nz, + old_grid.Hx, old_grid.Hy, old_grid.Hz, + old_grid.Lx, old_grid.Ly, old_grid.Lz, + new_properties...) +end From d6fbc1f0fb3ec577c153dfd2b7b4408df71ff97a Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:36:23 -0500 Subject: [PATCH 20/30] remove using on_architecture --- src/Advection/Advection.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Advection/Advection.jl b/src/Advection/Advection.jl index d949070438..b8d7d96469 100644 --- a/src/Advection/Advection.jl +++ b/src/Advection/Advection.jl @@ -32,7 +32,7 @@ using OffsetArrays using Oceananigans.Grids using Oceananigans.Grids: with_halo, coordinates -using Oceananigans.Architectures: on_architecture, architecture, CPU +using Oceananigans.Architectures: architecture, CPU using Oceananigans.Operators From 9672946506f7f5c21066b8f25e0a8e5553ecee5f Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:37:41 -0500 Subject: [PATCH 21/30] remove wrong docstring --- src/AbstractOperations/derivatives.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/AbstractOperations/derivatives.jl b/src/AbstractOperations/derivatives.jl index 751e794b94..ee38f48fc8 100644 --- a/src/AbstractOperations/derivatives.jl +++ b/src/AbstractOperations/derivatives.jl @@ -118,7 +118,7 @@ compute_at!(∂::Derivative, time) = compute_at!(∂.arg, time) ##### GPU capabilities ##### -"Adapt `Derivative` to work on the GPU via CUDAnative and CUDAdrv." +"Adapt `Derivative` to work on the GPU." Adapt.adapt_structure(to, deriv::Derivative{LX, LY, LZ}) where {LX, LY, LZ} = Derivative{LX, LY, LZ}(Adapt.adapt(to, deriv.∂), Adapt.adapt(to, deriv.arg), @@ -126,7 +126,6 @@ Adapt.adapt_structure(to, deriv::Derivative{LX, LY, LZ}) where {LX, LY, LZ} = nothing, Adapt.adapt(to, deriv.grid)) -"Adapt `Derivative` to work on the GPU via CUDAnative and CUDAdrv." on_architecture(to, deriv::Derivative{LX, LY, LZ}) where {LX, LY, LZ} = Derivative{LX, LY, LZ}(on_architecture(to, deriv.∂), on_architecture(to, deriv.arg), From 41880bbc28b9509ba43a2d5acd5e469ea6746815 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:39:55 -0500 Subject: [PATCH 22/30] some more cleaning --- src/Architectures.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index d3b5f838a1..9b03fbf7fa 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -72,8 +72,8 @@ on_architecture(arch::AbstractSerialArchitecture, t::Tuple) = Tuple(on_architect on_architecture(arch::AbstractSerialArchitecture, nt::NamedTuple) = NamedTuple{keys(nt)}(on_architecture(arch, Tuple(nt))) # On architecture for array types -on_architecture(::CPU, a::Array) = a -on_architecture(::GPU, a::Array) = CuArray(a) +on_architecture(::CPU, a::Array) = a +on_architecture(::GPU, a::Array) = CuArray(a) on_architecture(::CPU, a::CuArray) = Array(a) on_architecture(::GPU, a::CuArray) = a @@ -81,11 +81,11 @@ on_architecture(::GPU, a::CuArray) = a on_architecture(::CPU, a::BitArray) = a on_architecture(::GPU, a::BitArray) = CuArray(a) -on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:CuArray}) = a on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:CuArray}) = Array(a) +on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:CuArray}) = a -on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:Array}) = CuArray(a) on_architecture(::CPU, a::SubArray{<:Any, <:Any, <:Array}) = a +on_architecture(::GPU, a::SubArray{<:Any, <:Any, <:Array}) = CuArray(a) on_architecture(arch::AbstractSerialArchitecture, a::OffsetArray) = OffsetArray(on_architecture(arch, a.parent), a.offsets...) From 92f2644ca2b5be5572bc53219885daec58bd9027 Mon Sep 17 00:00:00 2001 From: "Navid C. Constantinou" Date: Tue, 5 Mar 2024 22:05:38 +0200 Subject: [PATCH 23/30] include("distributed_on_architecture.jl") --- src/DistributedComputations/DistributedComputations.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DistributedComputations/DistributedComputations.jl b/src/DistributedComputations/DistributedComputations.jl index 70dbc90992..f80cd7973e 100644 --- a/src/DistributedComputations/DistributedComputations.jl +++ b/src/DistributedComputations/DistributedComputations.jl @@ -12,6 +12,7 @@ using Oceananigans.Utils using Oceananigans.Grids include("distributed_architectures.jl") +include("distributed_on_architecture.jl") include("partition_assemble.jl") include("distributed_grids.jl") include("distributed_kernel_launching.jl") From 3e4c6c2257824281fae96a3cd86be11695f45c40 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Tue, 5 Mar 2024 15:35:34 -0500 Subject: [PATCH 24/30] add distributed_on_architecture --- src/DistributedComputations/DistributedComputations.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DistributedComputations/DistributedComputations.jl b/src/DistributedComputations/DistributedComputations.jl index 70dbc90992..b1e224c607 100644 --- a/src/DistributedComputations/DistributedComputations.jl +++ b/src/DistributedComputations/DistributedComputations.jl @@ -14,6 +14,7 @@ using Oceananigans.Grids include("distributed_architectures.jl") include("partition_assemble.jl") include("distributed_grids.jl") +include("distributed_on_architecture.jl") include("distributed_kernel_launching.jl") include("halo_communication_bcs.jl") include("distributed_fields.jl") From 362a26853ecce67fd665307166d72c8090a2caff Mon Sep 17 00:00:00 2001 From: "Navid C. Constantinou" Date: Wed, 6 Mar 2024 13:10:10 +0200 Subject: [PATCH 25/30] bump patch release --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 2004614377..2353752cc1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Oceananigans" uuid = "9e8cae18-63c1-5223-a75c-80ca9d6e9a09" authors = ["Climate Modeling Alliance and contributors"] -version = "0.90.9" +version = "0.90.10" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From 059a39d0f80b7c12b6a379369cf62ca580d79371 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:46:27 -0500 Subject: [PATCH 26/30] bugfix --- src/Fields/field.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Fields/field.jl b/src/Fields/field.jl index 288a07f6ec..27bf1c2c3c 100644 --- a/src/Fields/field.jl +++ b/src/Fields/field.jl @@ -418,7 +418,7 @@ total_size(f::Field) = total_size(f.grid, location(f), f.indices) ##### Move Fields between architectures ##### -on_architecture(arch, field::AbstractField) = +on_architecture(arch, field::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = Field{LX, LY, LZ}(on_architecture(arch, field.grid), on_architecture(arch, data), on_architecture(arch, bcs), From f34327e31f2309273a054f04a8eb85b25f838109 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:06:19 -0500 Subject: [PATCH 27/30] correct distributed on_architecture --- .../distributed_on_architecture.jl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/DistributedComputations/distributed_on_architecture.jl b/src/DistributedComputations/distributed_on_architecture.jl index 15ceed1e62..ca780cf48a 100644 --- a/src/DistributedComputations/distributed_on_architecture.jl +++ b/src/DistributedComputations/distributed_on_architecture.jl @@ -1,9 +1,19 @@ -import Oceananigans.Architectures: on_architecture +using CUDA: CuArray +using OffsetArrays # We do not support switching from distributed and serial through `on_architecture`. # We only support moving a type from CPU to GPU and the other way around # TODO: support changing the number of ranks / the partitioning? -on_architecture(arch::Distributed, A) = on_architecture(child_architecture(arch), A) + +# Disambiguation for methods defined in `src/Architectures.jl` +DisambiguationTypes = Union{Array, + CuArray, + BitArray, + SubArray{<:Any, <:Any, <:CuArray}, + SubArray{<:Any, <:Any, <:Array}, + OffsetArray} + +on_architecture(::Distributed, a::DisambiguationTypes) = on_architecture(child_architecture(arch), a) function on_architecture(new_arch::Distributed, old_grid::LatitudeLongitudeGrid) child_arch = child_architecture(new_arch) From fcb6d9f447e89104596bdb075fac79a6380aedd1 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:08:08 -0500 Subject: [PATCH 28/30] remove double include --- src/DistributedComputations/DistributedComputations.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DistributedComputations/DistributedComputations.jl b/src/DistributedComputations/DistributedComputations.jl index 22d6cff7bb..b1e224c607 100644 --- a/src/DistributedComputations/DistributedComputations.jl +++ b/src/DistributedComputations/DistributedComputations.jl @@ -12,7 +12,6 @@ using Oceananigans.Utils using Oceananigans.Grids include("distributed_architectures.jl") -include("distributed_on_architecture.jl") include("partition_assemble.jl") include("distributed_grids.jl") include("distributed_on_architecture.jl") From bd3af47a139577631082786411be325fd4dfe46d Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:22:47 -0500 Subject: [PATCH 29/30] hopefully last bugfix --- src/DistributedComputations/distributed_on_architecture.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DistributedComputations/distributed_on_architecture.jl b/src/DistributedComputations/distributed_on_architecture.jl index ca780cf48a..69c95ba455 100644 --- a/src/DistributedComputations/distributed_on_architecture.jl +++ b/src/DistributedComputations/distributed_on_architecture.jl @@ -13,7 +13,7 @@ DisambiguationTypes = Union{Array, SubArray{<:Any, <:Any, <:Array}, OffsetArray} -on_architecture(::Distributed, a::DisambiguationTypes) = on_architecture(child_architecture(arch), a) +on_architecture(arch::Distributed, a::DisambiguationTypes) = on_architecture(child_architecture(arch), a) function on_architecture(new_arch::Distributed, old_grid::LatitudeLongitudeGrid) child_arch = child_architecture(new_arch) From 3d469d23e8265f4be27f5b6a4a10ed7c7e852f74 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:23:17 -0500 Subject: [PATCH 30/30] adding a couple of methods --- src/DistributedComputations/distributed_on_architecture.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/DistributedComputations/distributed_on_architecture.jl b/src/DistributedComputations/distributed_on_architecture.jl index 69c95ba455..1564547a59 100644 --- a/src/DistributedComputations/distributed_on_architecture.jl +++ b/src/DistributedComputations/distributed_on_architecture.jl @@ -11,7 +11,9 @@ DisambiguationTypes = Union{Array, BitArray, SubArray{<:Any, <:Any, <:CuArray}, SubArray{<:Any, <:Any, <:Array}, - OffsetArray} + OffsetArray, + Tuple, + NamedTuple} on_architecture(arch::Distributed, a::DisambiguationTypes) = on_architecture(child_architecture(arch), a)