Skip to content

Commit

Permalink
Add reproducible summation reduction
Browse files Browse the repository at this point in the history
- Currently supports sequential version of reduction
- Tests determinism by shuffling order of floating point numbers

Signed-off-by: Shreyas Atre <shreyasatre16@gmail.com>
  • Loading branch information
SAtacker committed Dec 17, 2024
1 parent 6ff0c9d commit 520f161
Show file tree
Hide file tree
Showing 5 changed files with 2,035 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) 2024 Shreyas Atre
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#pragma once

#include <hpx/config.hpp>
#include <hpx/functional/detail/tag_fallback_invoke.hpp>
#include <hpx/functional/invoke.hpp>
#include <hpx/parallel/algorithms/detail/rfa.hpp>
#include <hpx/parallel/util/loop.hpp>

#include <cstddef>
#include <limits>
#include <type_traits>
#include <utility>
#include "rfa.hpp"

namespace hpx::parallel::detail {

template <typename ExPolicy>
struct sequential_reduce_deterministic_t final
: hpx::functional::detail::tag_fallback<
sequential_reduce_deterministic_t<ExPolicy>>
{
private:
template <typename InIterB, typename InIterE, typename T,
typename Reduce>
friend constexpr T tag_fallback_invoke(
sequential_reduce_deterministic_t, ExPolicy&&, InIterB first,
InIterE last, T init, Reduce&& r)
{
hpx::parallel::detail::rfa::RFA_bins<T> bins;
bins.initialize_bins();
std::memcpy(rfa::__rfa_bin_host_buffer__, &bins, sizeof(bins));

hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<T> rfa;
rfa.set_max_abs_val(init);
rfa.unsafe_add(init);
rfa.renorm();
size_t count = 0;
T max_val = std::abs(*first);
for (auto e = first; e != last; ++e)
{
T temp_max_val = std::abs(static_cast<T>(*e));
if (max_val < temp_max_val)
{
rfa.set_max_abs_val(temp_max_val);
max_val = temp_max_val;
}
rfa.unsafe_add(*e);
count++;
if (count == rfa.endurance())
{
rfa.renorm();
count = 0;
}
}
return rfa.conv();
}
};

#if !defined(HPX_COMPUTE_DEVICE_CODE)
template <typename ExPolicy>
inline constexpr sequential_reduce_deterministic_t<ExPolicy>
sequential_reduce_deterministic =
sequential_reduce_deterministic_t<ExPolicy>{};
#else
template <typename ExPolicy, typename... Args>
HPX_HOST_DEVICE HPX_FORCEINLINE auto sequential_reduce_deterministic(
Args&&... args)
{
return sequential_reduce_deterministic_t<ExPolicy>{}(
std::forward<Args>(args)...);
}
#endif

} // namespace hpx::parallel::detail
Loading

0 comments on commit 520f161

Please sign in to comment.