diff --git a/src/DataLayouts/cuda.jl b/src/DataLayouts/cuda.jl index 35a8fd3241..a24fdccf9f 100644 --- a/src/DataLayouts/cuda.jl +++ b/src/DataLayouts/cuda.jl @@ -10,6 +10,9 @@ Adapt.adapt_structure(to, data::IJFH{S, Nij}) where {S, Nij} = Adapt.adapt_structure(to, data::VIJFH{S, Nij}) where {S, Nij} = VIJFH{S, Nij}(Adapt.adapt(to, parent(data))) +Adapt.adapt_structure(to, data::VIFH{S, Ni, A}) where {S, Ni, A} = + VIFH{S, Ni}(Adapt.adapt(to, parent(data))) + Adapt.adapt_structure(to, data::IFH{S, Ni}) where {S, Ni} = IFH{S, Ni}(Adapt.adapt(to, parent(data))) diff --git a/src/Limiters/quasimonotone.jl b/src/Limiters/quasimonotone.jl index 4dafc16c02..8dead42aea 100644 --- a/src/Limiters/quasimonotone.jl +++ b/src/Limiters/quasimonotone.jl @@ -1,5 +1,9 @@ +using CUDA import ClimaComms import CUDA +import ..Operators +import ..RecursiveApply: ⊠, ⊞, ⊟, rmap, rzero, rdiv +import Adapt """ QuasiMonotoneLimiter @@ -50,6 +54,12 @@ struct QuasiMonotoneLimiter{D, G, FT} rtol::FT end +Adapt.adapt_structure(to, lim::QuasiMonotoneLimiter) = QuasiMonotoneLimiter( + Adapt.adapt(to, lim.q_bounds), + Adapt.adapt(to, lim.q_bounds_nbr), + Adapt.adapt(to, lim.ghost_buffer), + lim.rtol, +) function QuasiMonotoneLimiter(ρq::Fields.Field; rtol = eps(eltype(parent(ρq)))) q_bounds = make_q_bounds(Fields.field_values(ρq)) diff --git a/test/Limiters/limiter.jl b/test/Limiters/limiter.jl index 8333b286f7..cc1a449fa6 100644 --- a/test/Limiters/limiter.jl +++ b/test/Limiters/limiter.jl @@ -126,28 +126,33 @@ end ρq = ρ .* q limiter = Limiters.QuasiMonotoneLimiter(ρq) - Limiters.compute_bounds!(limiter, ρq, ρ) + Limiters.compute_element_bounds!(limiter, ρq, ρ) is_gpu = device isa ClimaComms.CUDADevice S = map(Iterators.product(1:n1, 1:n2)) do (h1, h2) (h1, h2, slab(limiter.q_bounds, h1 + n1 * (h2 - 1))) end - @test all(map(T -> T[3][1].x ≈ 2 * (T[1] - 1), S)) broken = is_gpu # q_min - @test all(map(T -> T[3][1].y ≈ 3 * (T[2] - 1), S)) broken = is_gpu # q_min - @test all(map(T -> T[3][2].x ≈ 2 * T[1], S)) broken = is_gpu # q_max - @test all(map(T -> T[3][2].y ≈ 3 * T[2], S)) broken = is_gpu # q_max + CUDA.@allowscalar begin + @test all(map(T -> T[3][1].x ≈ 2 * (T[1] - 1), S)) broken = is_gpu # q_min + @test all(map(T -> T[3][1].y ≈ 3 * (T[2] - 1), S)) broken = is_gpu # q_min + @test all(map(T -> T[3][2].x ≈ 2 * T[1], S)) broken = is_gpu # q_max + @test all(map(T -> T[3][2].y ≈ 3 * T[2], S)) broken = is_gpu # q_max + end + Limiters.compute_neighbor_bounds_local!(limiter, ρ) SN = map(Iterators.product(1:n1, 1:n2)) do (h1, h2) (h1, h2, slab(limiter.q_bounds_nbr, h1 + n1 * (h2 - 1))) end - @test all(map(T -> T[3][1].x ≈ 2 * max(T[1] - 2, 0), SN)) broken = - is_gpu # q_min - @test all(map(T -> T[3][1].y ≈ 3 * max(T[2] - 2, 0), SN)) broken = - is_gpu # q_min - @test all(map(T -> T[3][2].x ≈ 2 * min(T[1] + 1, n1), SN)) broken = - is_gpu # q_max - @test all(map(T -> T[3][2].y ≈ 3 * min(T[2] + 1, n2), SN)) broken = - is_gpu # q_max + CUDA.@allowscalar begin + @test all(map(T -> T[3][1].x ≈ 2 * max(T[1] - 2, 0), SN)) broken = + is_gpu # q_min + @test all(map(T -> T[3][1].y ≈ 3 * max(T[2] - 2, 0), SN)) broken = + is_gpu # q_min + @test all(map(T -> T[3][2].x ≈ 2 * min(T[1] + 1, n1), SN)) broken = + is_gpu # q_max + @test all(map(T -> T[3][2].y ≈ 3 * min(T[2] + 1, n2), SN)) broken = + is_gpu # q_max + end end end