Skip to content

Commit

Permalink
Revert "[SYCL] Implement sub_group_mask version 2 (intel#11195)" (int…
Browse files Browse the repository at this point in the history
…el#12239)

This reverts commit 3bd09b9.

Reverting as it affects performance. Will re-introduce later.
  • Loading branch information
KornevNikita authored Dec 22, 2023
1 parent 04c4317 commit fb8c82d
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 179 deletions.
4 changes: 3 additions & 1 deletion sycl/include/sycl/ext/oneapi/experimental/tangle_group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ get_tangle_group(Group group) {
// TODO: Construct from compiler-generated mask. Return an invalid group in
// in the meantime. CUDA devices will report false for the tangle_group
// support aspect so kernels launch should ensure this is never run.
return tangle_group<sycl::sub_group>(0);
return tangle_group<sycl::sub_group>(
sycl::detail::Builder::createSubGroupMask<
sycl::ext::oneapi::sub_group_mask>(0, 0));
#endif
#else
throw runtime_error("Non-uniform groups are not supported on host device.",
Expand Down
76 changes: 21 additions & 55 deletions sycl/include/sycl/ext/oneapi/sub_group_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

#include <sycl/detail/helpers.hpp> // for Builder
#include <sycl/detail/memcpy.hpp> // detail::memcpy
#include <sycl/detail/type_traits.hpp> // for is_sub_group
#include <sycl/exception.hpp> // for errc, exception
#include <sycl/feature_test.hpp> // for SYCL_EXT_ONEAPI_SUB_GROUP_MASK
#include <sycl/id.hpp> // for id
#include <sycl/marray.hpp> // for marray
#include <sycl/types.hpp> // for vec
Expand All @@ -35,26 +35,25 @@ template <typename Group> struct group_scope;

} // namespace detail

// forward decalre sycl::sub_group
struct sub_group;

namespace ext::oneapi {

// forward decalre sycl::ext::oneapi::sub_group
struct sub_group;
#if defined(__SYCL_DEVICE_ONLY__) && defined(__AMDGCN__) && \
(__AMDGCN_WAVEFRONT_SIZE == 64)
#define BITS_TYPE uint64_t
#else
#define BITS_TYPE uint32_t
#endif

// defining `group_ballot` here to make predicate default `true`
// need to forward declare sub_group_mask first
struct sub_group_mask;
template <typename Group>
std::enable_if_t<std::is_same_v<std::decay_t<Group>, sub_group> ||
std::is_same_v<std::decay_t<Group>, sycl::sub_group>,
sub_group_mask>
std::enable_if_t<sycl::detail::is_sub_group<Group>::value, sub_group_mask>
group_ballot(Group g, bool predicate = true);

struct sub_group_mask {
friend class sycl::detail::Builder;
using BitsType = uint64_t;
using BitsType = BITS_TYPE;

static constexpr size_t max_bits =
sizeof(BitsType) * CHAR_BIT /* implementation-defined */;
Expand Down Expand Up @@ -82,8 +81,7 @@ struct sub_group_mask {
}

reference(sub_group_mask &gmask, size_t pos) : Ref(gmask.Bits) {
BitsType one = 1;
RefBit = (pos < gmask.bits_num) ? (one << pos) : 0;
RefBit = (pos < gmask.bits_num) ? (1UL << pos) : 0;
}

private:
Expand All @@ -93,36 +91,8 @@ struct sub_group_mask {
BitsType RefBit;
};

#if SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 2
sub_group_mask() : sub_group_mask(0, GetMaxLocalRangeSize()){};

sub_group_mask(unsigned long long val)
: sub_group_mask(0, GetMaxLocalRangeSize()) {
Bits = val;
};

template <typename T, std::size_t K,
typename = std::enable_if_t<std::is_integral_v<T>>>
sub_group_mask(const sycl::marray<T, K> &val)
: sub_group_mask(0, GetMaxLocalRangeSize()) {
for (size_t I = 0, BytesCopied = 0; I < K && BytesCopied < sizeof(Bits);
++I) {
size_t RemainingBytes = sizeof(Bits) - BytesCopied;
size_t BytesToCopy =
RemainingBytes < sizeof(T) ? RemainingBytes : sizeof(T);
sycl::detail::memcpy(reinterpret_cast<char *>(&Bits) + BytesCopied,
&val[I], BytesToCopy);
BytesCopied += BytesToCopy;
}
}

sub_group_mask(const sub_group_mask &other) = default;
sub_group_mask &operator=(const sub_group_mask &other) = default;
#endif // SYCL_EXT_ONEAPI_SUB_GROUP_MASK

bool operator[](id<1> id) const {
BitsType one = 1;
return (Bits & ((id.get(0) < bits_num) ? (one << id.get(0)) : 0));
return (Bits & ((id.get(0) < bits_num) ? (1UL << id.get(0)) : 0));
}

reference operator[](id<1> id) { return {*this, id.get(0)}; }
Expand Down Expand Up @@ -284,6 +254,10 @@ struct sub_group_mask {
return Tmp;
}

sub_group_mask(const sub_group_mask &rhs) = default;

sub_group_mask &operator=(const sub_group_mask &rhs) = default;

template <typename Group>
friend std::enable_if_t<std::is_same_v<std::decay_t<Group>, sub_group>,
sub_group_mask>
Expand Down Expand Up @@ -311,14 +285,6 @@ struct sub_group_mask {
}

private:
static size_t GetMaxLocalRangeSize() {
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_SubgroupMaxSize();
#else
return max_bits;
#endif
}

sub_group_mask(BitsType rhs, size_t bn)
: Bits(rhs & valuable_bits(bn)), bits_num(bn) {
assert(bits_num <= max_bits);
Expand All @@ -336,17 +302,15 @@ struct sub_group_mask {
};

template <typename Group>
std::enable_if_t<std::is_same_v<std::decay_t<Group>, sub_group> ||
std::is_same_v<std::decay_t<Group>, sycl::sub_group>,
sub_group_mask>
std::enable_if_t<sycl::detail::is_sub_group<Group>::value, sub_group_mask>
group_ballot(Group g, bool predicate) {
(void)g;
#ifdef __SYCL_DEVICE_ONLY__
auto res = __spirv_GroupNonUniformBallot(
sycl::detail::spirv::group_scope<Group>::value, predicate);
sub_group_mask::BitsType val = res[0];
if constexpr (sizeof(sub_group_mask::BitsType) == 8)
val |= ((sub_group_mask::BitsType)res[1]) << 32;
BITS_TYPE val = res[0];
if constexpr (sizeof(BITS_TYPE) == 8)
val |= ((BITS_TYPE)res[1]) << 32;
return sycl::detail::Builder::createSubGroupMask<sub_group_mask>(
val, g.get_max_local_range()[0]);
#else
Expand All @@ -356,6 +320,8 @@ group_ballot(Group g, bool predicate) {
#endif
}

#undef BITS_TYPE

} // namespace ext::oneapi
} // namespace _V1
} // namespace sycl
1 change: 0 additions & 1 deletion sycl/include/syclcompat/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
#include <utility>

#include <sycl/builtins.hpp>
#include <sycl/ext/intel/experimental/usm_properties.hpp>
#include <sycl/ext/oneapi/group_local_memory.hpp>
#include <sycl/usm.hpp>

Expand Down
2 changes: 1 addition & 1 deletion sycl/source/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ inline namespace _V1 {
// TODO: Move these feature-test macros to compiler driver.
#define SYCL_EXT_INTEL_DEVICE_INFO 6
#define SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE 1
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 2
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1
#define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1
#define SYCL_EXT_ONEAPI_MATRIX 1
#define SYCL_EXT_ONEAPI_ASSERT 1
Expand Down
119 changes: 0 additions & 119 deletions sycl/test-e2e/SubGroupMask/sub_group_mask_ver2.cpp

This file was deleted.

2 changes: 1 addition & 1 deletion sycl/test/extensions/macro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ constexpr bool backend_opencl_macro_defined = true;
constexpr bool backend_opencl_macro_defined = false;
#endif

#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK
#if SYCL_EXT_ONEAPI_SUB_GROUP_MASK == 1
constexpr bool sub_group_mask_macro_defined = true;
#else
constexpr bool sub_group_mask_macro_defined = false;
Expand Down
3 changes: 2 additions & 1 deletion sycl/test/extensions/sub_group_mask.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only %s
//
// This test is intended to check sycl::ext::oneapi::sub_group_mask interface.
// test for spec ver.2: sycl/test-e2e/SubGroupMask/sub_group_mask_ver2.cpp
// There is a work in progress update to the spec: intel/llvm#8174
// TODO: udpate this test once revision 2 of the extension is supported

#include <sycl/sycl.hpp>

Expand Down

0 comments on commit fb8c82d

Please sign in to comment.