diff --git a/src/DataLayouts/cuda.jl b/src/DataLayouts/cuda.jl
index 35a8fd3241..a24fdccf9f 100644
--- a/src/DataLayouts/cuda.jl
+++ b/src/DataLayouts/cuda.jl
@@ -10,6 +10,9 @@ Adapt.adapt_structure(to, data::IJFH{S, Nij}) where {S, Nij} =
 Adapt.adapt_structure(to, data::VIJFH{S, Nij}) where {S, Nij} =
     VIJFH{S, Nij}(Adapt.adapt(to, parent(data)))
 
+Adapt.adapt_structure(to, data::VIFH{S, Ni, A}) where {S, Ni, A} =
+    VIFH{S, Ni}(Adapt.adapt(to, parent(data)))
+
 Adapt.adapt_structure(to, data::IFH{S, Ni}) where {S, Ni} =
     IFH{S, Ni}(Adapt.adapt(to, parent(data)))
 
diff --git a/src/Limiters/quasimonotone.jl b/src/Limiters/quasimonotone.jl
index 4dafc16c02..8dead42aea 100644
--- a/src/Limiters/quasimonotone.jl
+++ b/src/Limiters/quasimonotone.jl
@@ -1,5 +1,9 @@
+using CUDA
 import ClimaComms
 import CUDA
+import ..Operators
+import ..RecursiveApply: ⊠, ⊞, ⊟, rmap, rzero, rdiv
+import Adapt
 
 """
     QuasiMonotoneLimiter
@@ -50,6 +54,12 @@ struct QuasiMonotoneLimiter{D, G, FT}
     rtol::FT
 end
 
+Adapt.adapt_structure(to, lim::QuasiMonotoneLimiter) = QuasiMonotoneLimiter(
+    Adapt.adapt(to, lim.q_bounds),
+    Adapt.adapt(to, lim.q_bounds_nbr),
+    Adapt.adapt(to, lim.ghost_buffer),
+    lim.rtol,
+)
 
 function QuasiMonotoneLimiter(ρq::Fields.Field; rtol = eps(eltype(parent(ρq))))
     q_bounds = make_q_bounds(Fields.field_values(ρq))
diff --git a/test/Limiters/limiter.jl b/test/Limiters/limiter.jl
index 8333b286f7..cc1a449fa6 100644
--- a/test/Limiters/limiter.jl
+++ b/test/Limiters/limiter.jl
@@ -126,28 +126,33 @@ end
         ρq = ρ .* q
 
         limiter = Limiters.QuasiMonotoneLimiter(ρq)
-        Limiters.compute_bounds!(limiter, ρq, ρ)
+        Limiters.compute_element_bounds!(limiter, ρq, ρ)
 
         is_gpu = device isa ClimaComms.CUDADevice
         S = map(Iterators.product(1:n1, 1:n2)) do (h1, h2)
             (h1, h2, slab(limiter.q_bounds, h1 + n1 * (h2 - 1)))
         end
-        @test all(map(T -> T[3][1].x ≈ 2 * (T[1] - 1), S)) broken = is_gpu # q_min
-        @test all(map(T -> T[3][1].y ≈ 3 * (T[2] - 1), S)) broken = is_gpu # q_min
-        @test all(map(T -> T[3][2].x ≈ 2 * T[1], S)) broken = is_gpu # q_max
-        @test all(map(T -> T[3][2].y ≈ 3 * T[2], S)) broken = is_gpu # q_max
+        CUDA.@allowscalar begin
+            @test all(map(T -> T[3][1].x ≈ 2 * (T[1] - 1), S)) broken = is_gpu # q_min
+            @test all(map(T -> T[3][1].y ≈ 3 * (T[2] - 1), S)) broken = is_gpu # q_min
+            @test all(map(T -> T[3][2].x ≈ 2 * T[1], S)) broken = is_gpu # q_max
+            @test all(map(T -> T[3][2].y ≈ 3 * T[2], S)) broken = is_gpu # q_max
+        end
 
+        Limiters.compute_neighbor_bounds_local!(limiter, ρ)
         SN = map(Iterators.product(1:n1, 1:n2)) do (h1, h2)
             (h1, h2, slab(limiter.q_bounds_nbr, h1 + n1 * (h2 - 1)))
         end
-        @test all(map(T -> T[3][1].x ≈ 2 * max(T[1] - 2, 0), SN)) broken =
-            is_gpu # q_min
-        @test all(map(T -> T[3][1].y ≈ 3 * max(T[2] - 2, 0), SN)) broken =
-            is_gpu # q_min
-        @test all(map(T -> T[3][2].x ≈ 2 * min(T[1] + 1, n1), SN)) broken =
-            is_gpu # q_max
-        @test all(map(T -> T[3][2].y ≈ 3 * min(T[2] + 1, n2), SN)) broken =
-            is_gpu # q_max
+        CUDA.@allowscalar begin
+            @test all(map(T -> T[3][1].x ≈ 2 * max(T[1] - 2, 0), SN)) broken =
+                is_gpu # q_min
+            @test all(map(T -> T[3][1].y ≈ 3 * max(T[2] - 2, 0), SN)) broken =
+                is_gpu # q_min
+            @test all(map(T -> T[3][2].x ≈ 2 * min(T[1] + 1, n1), SN)) broken =
+                is_gpu # q_max
+            @test all(map(T -> T[3][2].y ≈ 3 * min(T[2] + 1, n2), SN)) broken =
+                is_gpu # q_max
+        end
     end
 end