Skip to content

Commit

Permalink
Additional backend types to support Base.Threads (#61)
Browse files Browse the repository at this point in the history
* Additional backend types to support Base.Threads

* Removed reference to `KernelAbstractions.Backend` in docstrings

* Fixed suggestions

* Consistency in docstrings

---------

Co-authored-by: Sven Berger <berger.sven@gmail.com>
  • Loading branch information
kaipartmann and svchb authored Jul 24, 2024
1 parent 2b5d3ce commit b095e4b
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo
export DictionaryCellList, FullGridCellList
export ParallelUpdate, SemiParallelUpdate, SerialUpdate
export initialize!, update!, initialize_grid!, update_grid!
export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend
export PeriodicBox, copy_neighborhood_search

end # module PointNeighbors
17 changes: 13 additions & 4 deletions src/neighborhood_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ in this case to avoid unnecessary updates.
The first flag in `points_moving` indicates if points in `x` are moving.
The second flag indicates if points in `y` are moving.
!!! warning "Experimental Feature: Backend Specification"
The keyword argument `parallelization_backend` allows users to specify the
multithreading backend. This feature is currently considered experimental!
Possible parallelization backends are:
- [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic`
- [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static`
- [`PolyesterBackend`](@ref) to use `Polyester.@batch`
- `KernelAbstractions.Backend` to launch a GPU kernel
See also [`initialize!`](@ref).
"""
@inline function update!(search::AbstractNeighborhoodSearch, x, y;
Expand Down Expand Up @@ -130,7 +140,7 @@ Note that `system_coords` and `neighbor_coords` can be identical.
See also [`initialize!`](@ref), [`update!`](@ref).
"""
function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborhood_search;
parallel::Union{Bool, KernelAbstractions.Backend} = true,
parallel::Union{Bool, ParallelizationBackend} = true,
points = axes(system_coords, 2)) where {T}
# The type annotation above is to make Julia specialize on the type of the function.
# Otherwise, unspecialized code will cause a lot of allocations
Expand All @@ -141,8 +151,7 @@ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborho
# threaded loop with `Polyester.@batch`, or, when `system_coords` is a GPU array,
# launch the loop as a kernel on the GPU.
parallel_ = Val(parallel)
elseif parallel isa KernelAbstractions.Backend
# WARNING! Undocumented, experimental feature:
elseif parallel isa ParallelizationBackend
# When a `KernelAbstractions.Backend` is passed, launch the loop as a GPU kernel
# on this backend. This is useful to test the GPU code on the CPU by passing
# `parallel = KernelAbstractions.CPU()`, even though `system_coords isa Array`.
Expand All @@ -165,7 +174,7 @@ end
# When a `KernelAbstractions.Backend` is passed, launch a GPU kernel on this backend
@inline function foreach_point_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, points,
backend::KernelAbstractions.Backend)
backend::ParallelizationBackend)
@threaded backend for point in points
foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search, point)
end
Expand Down
56 changes: 53 additions & 3 deletions src/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,35 @@ end
return floor(Int, i)
end

abstract type AbstractThreadingBackend end

"""
PolyesterBackend()
Pass as first argument to the [`@threaded`](@ref) macro to make the loop multithreaded
with `Polyester.@batch`.
"""
struct PolyesterBackend <: AbstractThreadingBackend end

"""
ThreadsDynamicBackend()
Pass as first argument to the [`@threaded`](@ref) macro to make the loop multithreaded
with `Threads.@threads :dynamic`.
"""
struct ThreadsDynamicBackend <: AbstractThreadingBackend end

"""
ThreadsStaticBackend()
Pass as first argument to the [`@threaded`](@ref) macro to make the loop multithreaded
with `Threads.@threads :static`.
"""
struct ThreadsStaticBackend <: AbstractThreadingBackend end

const ParallelizationBackend = Union{AbstractThreadingBackend, KernelAbstractions.Backend}

"""
@threaded x for ... end
Expand All @@ -30,15 +59,21 @@ Semantically the same as `Threads.@threads` when iterating over a `AbstractUnitR
but without guarantee that the underlying implementation uses `Threads.@threads`
or works for more general `for` loops.
The first argument must either be a `KernelAbstractions.Backend` or an array from which the
backend can be derived to determine if the loop must be run threaded on the CPU
The first argument must either be a parallelization backend (see below) or an array from
which the backend can be derived to determine if the loop must be run threaded on the CPU
or launched as a kernel on the GPU. Passing `KernelAbstractions.CPU()` will run the GPU
kernel on the CPU.
Possible parallelization backends are:
- [`PolyesterBackend`](@ref) to use `Polyester.@batch`
- [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic`
- [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static`
- `KernelAbstractions.Backend` to execute the loop as a GPU kernel
In particular, the underlying threading capabilities might be provided by other packages
such as [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl).
!!! warn
!!! warning "Warning"
This macro does not necessarily work for general `for` loops. For example,
it does not necessarily support general iterables such as `eachline(filename)`.
"""
Expand All @@ -61,12 +96,27 @@ macro threaded(system, expr)
end

# Use `Polyester.@batch` for low-overhead threading
# This is currently the default when x::Array
@inline function parallel_foreach(f, iterator, x)
Polyester.@batch for i in iterator
@inline f(i)
end
end

# Use `Threads.@threads :dynamic`
@inline function parallel_foreach(f, iterator, x::ThreadsDynamicBackend)
Threads.@threads :dynamic for i in iterator
@inline f(i)
end
end

# Use `Threads.@threads :static`
@inline function parallel_foreach(f, iterator, x::ThreadsStaticBackend)
Threads.@threads :static for i in iterator
@inline f(i)
end
end

# On GPUs, execute `f` inside a GPU kernel with KernelAbstractions.jl
@inline function parallel_foreach(f, iterator,
x::Union{AbstractGPUArray, KernelAbstractions.Backend})
Expand Down

0 comments on commit b095e4b

Please sign in to comment.