diff --git a/libs/core/algorithms/CMakeLists.txt b/libs/core/algorithms/CMakeLists.txt index 6fcfed897e2..9090345722d 100644 --- a/libs/core/algorithms/CMakeLists.txt +++ b/libs/core/algorithms/CMakeLists.txt @@ -37,7 +37,9 @@ set(algorithms_headers hpx/parallel/algorithms/detail/parallel_stable_sort.hpp hpx/parallel/algorithms/detail/pivot.hpp hpx/parallel/algorithms/detail/reduce.hpp + hpx/parallel/algorithms/detail/reduce_deterministic.hpp hpx/parallel/algorithms/detail/replace.hpp + hpx/parallel/algorithms/detail/rfa.hpp hpx/parallel/algorithms/detail/rotate.hpp hpx/parallel/algorithms/detail/sample_sort.hpp hpx/parallel/algorithms/detail/search.hpp @@ -72,6 +74,7 @@ set(algorithms_headers hpx/parallel/algorithms/partition.hpp hpx/parallel/algorithms/reduce_by_key.hpp hpx/parallel/algorithms/reduce.hpp + hpx/parallel/algorithms/reduce_deterministic.hpp hpx/parallel/algorithms/remove_copy.hpp hpx/parallel/algorithms/remove.hpp hpx/parallel/algorithms/replace.hpp diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp index 87069858f49..b3773088917 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,8 @@ namespace hpx::parallel::detail { sequential_reduce_deterministic_t, ExPolicy&&, InIterB first, InIterE last, T init, Reduce&& r) { + /// TODO: Put constraint on Reduce to be a binary plus operator + (void) r; hpx::parallel::detail::rfa::RFA_bins bins; bins.initialize_bins(); std::memcpy(rfa::__rfa_bin_host_buffer__, &bins, sizeof(bins)); diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp index 302f823fab7..9fad816c432 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp @@ -1,3 +1,19 @@ +/// Copyright 2022 Richard Barnes, Peter Ahrens, James Demmel +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to deal +/// in the Software without restriction, including without limitation the rights +/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +/// copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +/// SOFTWARE. //Reproducible Floating Point Accumulations via Binned Floating Point //Adapted to C++ by Richard Barnes from ReproBLAS v2.1.0. //ReproBLAS by Peter Ahrens, Hong Diep Nguyen, and James Demmel. @@ -26,6 +42,8 @@ #include #include #include +#include +#include namespace hpx::parallel::detail::rfa { template @@ -351,21 +369,21 @@ namespace hpx::parallel::detail::rfa { ///Get index of float-point precision ///The index of a non-binned type is the smallest index a binned type would - ///need to have to sum it reproducibly. Higher indicies correspond to smaller + ///need to have to sum it reproducibly. Higher indices correspond to smaller ///bins. static inline constexpr int binned_dindex(const ftype x) { int exp = EXP(x); if (exp == 0) { - if (x == 0.0) + if (x == static_cast(0.0)) { return MAXINDEX; } else { std::frexp(x, &exp); - return std::max((MAX_EXP - exp) / BIN_WIDTH, MAXINDEX); + return (std::max)((MAX_EXP - exp) / BIN_WIDTH, MAXINDEX); } } return ((MAX_EXP + EXP_BIAS) - exp) / BIN_WIDTH; @@ -373,7 +391,7 @@ namespace hpx::parallel::detail::rfa { ///Get index of manually specified binned double precision ///The index of a binned type is the bin that it corresponds to. Higher - ///indicies correspond to smaller bins. + ///indices correspond to smaller bins. inline int binned_index() const { return ((MAX_EXP + MANT_DIG - BIN_WIDTH + 1 + EXP_BIAS) - @@ -457,7 +475,7 @@ namespace hpx::parallel::detail::rfa { if (binned_index0()) { M = primary(0); - ftype qd = x * COMPRESSION; + ftype qd = x * static_cast(COMPRESSION); auto& ql = get_bits(qd); ql |= 1; qd += M; @@ -550,7 +568,7 @@ namespace hpx::parallel::detail::rfa { int i = 0; if (ISNANINF(primary(0))) - return primary(0); + return (double) primary(0); if (ISZERO(primary(0))) return 0.0; @@ -564,29 +582,36 @@ namespace hpx::parallel::detail::rfa { { scale_down = std::ldexp(0.5, 1 - (2 * MANT_DIG - BIN_WIDTH)); scale_up = std::ldexp(0.5, 1 + (2 * MANT_DIG - BIN_WIDTH)); - scaled = std::max( - std::min(FOLD, (3 * MANT_DIG) / BIN_WIDTH - X_index), 0); + scaled = (std::max)( + (std::min)(FOLD, (3 * MANT_DIG) / BIN_WIDTH - X_index), 0); if (X_index == 0) { - Y += carry(0) * ((bins[0] / 6.0) * scale_down * EXPANSION); - Y += carry(inccarX) * ((bins[1] / 6.0) * scale_down); - Y += (primary(0) - bins[0]) * scale_down * EXPANSION; + Y += ((double) carry(0)) * + ((((double) bins[0]) / 6.0) * scale_down * EXPANSION); + Y += ((double) carry(inccarX)) * + ((((double) bins[1]) / 6.0) * scale_down); + Y += ((double) primary(0) - (double) bins[0]) * scale_down * + EXPANSION; i = 2; } else { - Y += carry(0) * ((bins[0] / 6.0) * scale_down); + Y += ((double) carry(0)) * + (((double) bins[0] / 6.0) * scale_down); i = 1; } for (; i < scaled; i++) { - Y += carry(i * inccarX) * ((bins[i] / 6.0) * scale_down); - Y += - (primary((i - 1) * incpriX) - bins[i - 1]) * scale_down; + Y += ((double) carry(i * inccarX)) * + (((double) bins[i] / 6.0) * scale_down); + Y += ((double) primary((i - 1) * incpriX) - + (double) (bins[i - 1])) * + scale_down; } if (i == FOLD) { - Y += (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]) * + Y += ((double) primary((FOLD - 1) * incpriX) - + (double) (bins[FOLD - 1])) * scale_down; return Y * scale_up; } @@ -597,20 +622,23 @@ namespace hpx::parallel::detail::rfa { Y *= scale_up; for (; i < FOLD; i++) { - Y += carry(i * inccarX) * (bins[i] / 6.0); - Y += primary((i - 1) * incpriX) - bins[i - 1]; + Y += ((double) carry(i * inccarX)) * + ((double) bins[i] / 6.0); + Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]); } - Y += primary((FOLD - 1) * incpriX) - bins[FOLD - 1]; + Y += ((double) primary((FOLD - 1) * incpriX) - + ((double) bins[FOLD - 1])); } else { - Y += carry(0) * (bins[0] / 6.0); + Y += ((double) carry(0)) * ((double) bins[0] / 6.0); for (i = 1; i < FOLD; i++) { - Y += carry(i * inccarX) * (bins[i] / 6.0); - Y += (primary((i - 1) * incpriX) - bins[i - 1]); + Y += ((double) carry(i * inccarX)) * + ((double) bins[i] / 6.0); + Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]); } - Y += (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]); + Y += (double) (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]); } return Y; } @@ -627,7 +655,7 @@ namespace hpx::parallel::detail::rfa { if (ISNANINF(primary(0))) return primary(0); if (ISZERO(primary(0))) - return 0.0; + return 0.0f; //Note that the following order of summation is in order of decreasing //exponent. The following code is specific to SBWIDTH=13, FLT_MANT_DIG=24, and @@ -636,20 +664,22 @@ namespace hpx::parallel::detail::rfa { const auto* const bins = binned_bins(X_index); if (X_index == 0) { - Y += (double) carry(0) * (double) (bins[0] / 6.0) * + Y += (double) carry(0) * (double) (((double) bins[0]) / 6.0) * (double) EXPANSION; - Y += (double) carry(inccarX) * (double) (bins[1] / 6.0); + Y += (double) carry(inccarX) * + (double) (((double) bins[1]) / 6.0); Y += (double) (primary(0) - bins[0]) * (double) EXPANSION; i = 2; } else { - Y += (double) carry(0) * (double) (bins[0] / 6.0); + Y += (double) carry(0) * (double) (((double) bins[0]) / 6.0); i = 1; } for (; i < FOLD; i++) { - Y += (double) carry(i * inccarX) * (double) (bins[i] / 6.0); + Y += (double) carry(i * inccarX) * + (double) (((double) bins[i]) / 6.0); Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]); } Y += (double) (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]); @@ -867,11 +897,11 @@ namespace hpx::parallel::detail::rfa { { if (std::is_same_v) { - return binned_conv_single(1, 1); + return static_cast(binned_conv_single(1, 1)); } else { - return binned_conv_double(1, 1); + return static_cast(binned_conv_double(1, 1)); } } @@ -888,7 +918,8 @@ namespace hpx::parallel::detail::rfa { { const double X = std::abs(max_abs_val); const double S = std::abs(binned_sum); - return static_cast(max(X, std::ldexp(0.5, MIN_EXP - 1)) * + return static_cast( + (std::max)(X, std::ldexp(0.5, MIN_EXP - 1)) * std::ldexp(0.5, (1 - FOLD) * BIN_WIDTH + 1) * N + ((7.0 * EPSILON) / (1.0 - 6.0 * std::sqrt(static_cast(EPSILON)) - @@ -973,7 +1004,7 @@ namespace hpx::parallel::detail::rfa { T max_abs_val = input[0]; for (size_t i = 0; i < N; i++) { - max_abs_val = max(max_abs_val, std::abs(input[i])); + max_abs_val = (std::max)(max_abs_val, std::abs(input[i])); } add(input, N, max_abs_val); } @@ -1142,4 +1173,4 @@ namespace hpx::parallel::detail::rfa { } }; -} // namespace hpx::parallel::detail::rfa \ No newline at end of file +} // namespace hpx::parallel::detail::rfa diff --git a/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp b/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp index 92dd2e7f3dc..d6d2a924961 100644 --- a/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp +++ b/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp @@ -4,8 +4,6 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#pragma once - #include #include #include @@ -19,6 +17,7 @@ #include #include #include +#include #include #include "test_utils.hpp" @@ -27,8 +26,8 @@ int seed = std::random_device{}(); std::mt19937 gen(seed); template -T get_rand( - T LO = std::numeric_limits::min(), T HI = std::numeric_limits::max()) +T get_rand(T LO = (std::numeric_limits::min)(), + T HI = (std::numeric_limits::max)()) { return LO + static_cast(std::rand()) / (static_cast(RAND_MAX / (HI - LO))); @@ -75,8 +74,8 @@ void test_reduce1(IteratorTag) FloatTypeNonDeterministic r3 = std::accumulate( nondeterministic.begin(), nondeterministic.end(), val_non_det); - HPX_TEST_EQ(r1, r3); - HPX_TEST_EQ(r2, r3); + HPX_TEST_EQ(static_cast(r1), r3); + HPX_TEST_EQ(static_cast(r2), r3); } template