Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make benchmarks run on the GPU #77

Merged
merged 5 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions benchmarks/n_body.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@ This is a more realistic benchmark for particle-based simulations than
However, due to the higher computational cost, differences between neighborhood search
implementations are less pronounced.
"""
function benchmark_n_body(neighborhood_search, coordinates; parallel = true)
mass = 1e10 * (rand(size(coordinates, 2)) .+ 1)
function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
# Passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
# Passing `parallel = true` or `parallel = false` will not change anything here.
coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)

# This preserves the data type of `coordinates`, which makes it work for GPU types
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
G = 6.6743e-11

dv = similar(coordinates)
Expand All @@ -36,6 +43,5 @@ function benchmark_n_body(neighborhood_search, coordinates; parallel = true)
return dv
end

return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G,
$neighborhood_search, $parallel)
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
end
76 changes: 70 additions & 6 deletions benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,80 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
smoothing_length, viscosity = viscosity,
density_diffusion = density_diffusion)

v = vcat(fluid.velocity, fluid.density')
u = copy(fluid.coordinates)
# Note that we cannot just disable parallelism in TrixiParticles.
# But passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
if parallel isa Bool
system = fluid_system
nhs = neighborhood_search
else
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
end

v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(fluid_system, neighborhood_search)
TrixiParticles.compute_pressure!(fluid_system, v)
TrixiParticles.initialize!(system, nhs)
TrixiParticles.compute_pressure!(system, v)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $neighborhood_search,
$fluid_system, $fluid_system)
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
end

"""
benchmark_wcsph_fp32(neighborhood_search, coordinates; parallel = true)

Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
"""
function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
coordinates = convert(Matrix{Float32}, coordinates_)
density = 1000.0f0
fluid = InitialCondition(; coordinates, density, mass = 0.1f0)

# Compact support == smoothing length for the Wendland kernel
smoothing_length = convert(Float32, PointNeighbors.search_radius(neighborhood_search))
if ndims(neighborhood_search) == 1
smoothing_kernel = SchoenbergCubicSplineKernel{1}()
else
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

sound_speed = 10.0f0
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

fluid_density_calculator = ContinuityDensity()
viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0)

fluid_system = WeaklyCompressibleSPHSystem(fluid, fluid_density_calculator,
state_equation, smoothing_kernel,
smoothing_length, viscosity = viscosity,
acceleration = (0.0f0, 0.0f0, 0.0f0),
density_diffusion = density_diffusion)

# Note that we cannot just disable parallelism in TrixiParticles.
# But passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
if parallel isa Bool
system = fluid_system
nhs = neighborhood_search
else
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
end

v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(system, nhs)
TrixiParticles.compute_pressure!(system, v)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
end

"""
Expand Down