Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Proof of Concept] Remove all exceptions from fluid-fluid kernel #79

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using Reexport: @reexport

using Adapt: Adapt
using Atomix: Atomix
using Base: @propagate_inbounds
using GPUArraysCore: AbstractGPUArray
using KernelAbstractions: KernelAbstractions, @kernel, @index
using LinearAlgebra: dot
Expand Down
4 changes: 2 additions & 2 deletions src/cell_lists/full_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,15 @@ function each_cell_index(cell_list::FullGridCellList{Nothing})
error("`search_radius` is not defined for this cell list")
end

@inline function cell_index(cell_list::FullGridCellList, cell::Tuple)
@propagate_inbounds function cell_index(cell_list::FullGridCellList, cell::Tuple)
(; linear_indices) = cell_list

return linear_indices[cell...]
end

@inline cell_index(::FullGridCellList, cell::Integer) = cell

@inline function Base.getindex(cell_list::FullGridCellList, cell)
@propagate_inbounds function Base.getindex(cell_list::FullGridCellList, cell)
(; cells) = cell_list

return cells[cell_index(cell_list, cell)]
Expand Down
21 changes: 18 additions & 3 deletions src/neighborhood_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,13 @@ end

@inline function foreach_point_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, points, parallel::Val{true})
# Explicit bounds check before the hot loop (or GPU kernel)
@boundscheck checkbounds(system_coords, ndims(neighborhood_search))

@threaded system_coords for point in points
foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search, point)
# Now we can assume that `point` is inbounds
@inbounds foreach_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, point)
end

return nothing
Expand All @@ -175,17 +180,27 @@ end
@inline function foreach_point_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, points,
backend::ParallelizationBackend)
# Explicit bounds check before the hot loop (or GPU kernel)
@boundscheck checkbounds(system_coords, ndims(neighborhood_search))

@threaded backend for point in points
foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search, point)
# Now we can assume that `point` is inbounds
@inbounds foreach_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, point)
end

return nothing
end

@inline function foreach_point_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, points, parallel::Val{false})
# Explicit bounds check before the hot loop
@boundscheck checkbounds(system_coords, ndims(neighborhood_search))

for point in points
foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search, point)
# Now we can assume that `point` is inbounds
@inbounds foreach_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, point)
end

return nothing
Expand Down
33 changes: 26 additions & 7 deletions src/nhs_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -337,19 +337,38 @@
return neighborhood_search
end

@inline function foreach_neighbor(f, system_coords, neighbor_system_coords,
neighborhood_search::GridNeighborhoodSearch, point;
search_radius = search_radius(neighborhood_search))
@propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_system_coords,
neighborhood_search::GridNeighborhoodSearch,
point;
search_radius = search_radius(neighborhood_search))
# Due to https://github.com/JuliaLang/julia/issues/30411, we cannot just remove
# a `@boundscheck` by calling this function with `@inbounds` because it has a kwarg.
# We have to use `@propagate_inbounds`, which will also remove boundschecks
# in the neighbor loop, which is not safe (see comment below).
# To avoid this, we have to use a functio barrier to disable the `@inbounds` again.

Check warning on line 348 in src/nhs_grid.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"functio" should be "function".
point_coords = extract_svector(system_coords, Val(ndims(neighborhood_search)), point)

__foreach_neighbor(f, system_coords, neighbor_system_coords, neighborhood_search,
point, point_coords, search_radius)
end

@inline function __foreach_neighbor(f, system_coords, neighbor_system_coords,
neighborhood_search::GridNeighborhoodSearch,
point, point_coords, search_radius)
(; periodic_box) = neighborhood_search

point_coords = extract_svector(system_coords, Val(ndims(neighborhood_search)), point)
cell = cell_coords(point_coords, neighborhood_search)

for neighbor_cell_ in neighboring_cells(cell, neighborhood_search)
neighbor_cell = Tuple(neighbor_cell_)
neighbors = @inbounds points_in_cell(neighbor_cell, neighborhood_search)

for neighbor_ in eachindex(neighbors)
neighbor = @inbounds neighbors[neighbor_]

for neighbor in points_in_cell(neighbor_cell, neighborhood_search)
neighbor_coords = extract_svector(neighbor_system_coords,
# Making the following `@inbounds` yields a ~2% speedup on an NVIDIA H100.
# But we don't know if `neighbor` (extracted from the cell list) is in bounds.
neighbor_coords = @inbounds extract_svector(neighbor_system_coords,
Val(ndims(neighborhood_search)), neighbor)

pos_diff = point_coords - neighbor_coords
Expand Down Expand Up @@ -386,7 +405,7 @@
for cell in neighboring_cells(cell, neighborhood_search))
end

@inline function points_in_cell(cell_index, neighborhood_search)
@propagate_inbounds function points_in_cell(cell_index, neighborhood_search)
(; cell_list) = neighborhood_search

return cell_list[periodic_cell_index(cell_index, neighborhood_search)]
Expand Down
21 changes: 16 additions & 5 deletions src/util.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Return the `i`-th column of the array `A` as an `SVector`.
@inline function extract_svector(A, ::Val{NDIMS}, i) where {NDIMS}
return SVector(ntuple(@inline(dim->A[dim, i]), NDIMS))
# Explicit bounds check, which can be removed by calling this function with `@inbounds`
@boundscheck checkbounds(A, NDIMS, i)

# Assume inbounds access now
return SVector(ntuple(@inline(dim -> @inbounds A[dim, i]), NDIMS))
end

# When particles end up with coordinates so big that the cell coordinates
Expand All @@ -13,13 +17,20 @@ end
# If we threw an error here, we would prevent the time integration method from
# retrying with a smaller time step, and we would thus crash perfectly fine simulations.
@inline function floor_to_int(i)
if isnan(i) || i > typemax(Int)
# `Base.floor(Int, i)` is defined as `trunc(Int, round(x, RoundDown))`
rounded = round(i, RoundDown)

# `Base.trunc(Int, x)` throws an `InexactError` in these cases, and otherwise
# returns `unsafe_trunc(Int, rounded)`.
if isnan(rounded) || rounded >= typemax(Int)
return typemax(Int)
elseif i < typemin(Int)
elseif rounded <= typemin(Int)
return typemin(Int)
end

return floor(Int, i)
# After making sure that `rounded` is in the range of `Int`,
# we can safely call `unsafe_trunc`.
return unsafe_trunc(Int, rounded)
end

abstract type AbstractThreadingBackend end
Expand Down Expand Up @@ -134,7 +145,7 @@ end
# Call the generic kernel that is defined below, which only calls a function with
# the global GPU index.
generic_kernel(backend)(ndrange = ndrange) do i
@inline f(iterator[indices[i]])
@inbounds @inline f(iterator[indices[i]])
end

KernelAbstractions.synchronize(backend)
Expand Down
3 changes: 2 additions & 1 deletion src/vector_of_vectors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ end
@inline function Base.getindex(vov::DynamicVectorOfVectors, i)
(; backend, lengths) = vov

# This is slightly faster than without explicit boundscheck and `@inbounds` below
@boundscheck checkbounds(vov, i)

return view(backend, 1:lengths[i], i)
return @inbounds view(backend, 1:lengths[i], i)
end

@inline function Base.push!(vov::DynamicVectorOfVectors, vector::AbstractVector)
Expand Down
Loading