Skip to content

Commit

Permalink
remove __detail namespace and minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
fbusato committed Jan 2, 2025
1 parent 492486b commit 15d4d3e
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 95 deletions.
33 changes: 14 additions & 19 deletions libcudacxx/include/cuda/std/__bit/clz.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@

_LIBCUDACXX_BEGIN_NAMESPACE_STD

namespace __detail
{

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_clz(uint32_t __x) noexcept
{
for (int __i = 31; __i >= 0; --__i)
Expand All @@ -57,15 +54,15 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_clz(uint64_t __x) noexcept
return 64;
}

_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(uint32_t __x)
_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(uint32_t __x) noexcept
{
#if defined(__CUDA_ARCH__)
return ::__clz(__x);
#elif _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv
unsigned long __where = 0;
if (::_BitScanReverse32(&__where, __x))
{
return static_cast<int>(31 - __where);
return 31 - static_cast<int>(__where);
}
return 32; // Undefined Behavior.
#else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv
Expand All @@ -75,59 +72,57 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(uint32_t __x)

#if _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv

_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz_msvc(uint64_t __x)
_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz_msvc(uint64_t __x) noexcept
{
unsigned long __where = 0;
# if defined(_LIBCUDACXX_HAS_BITSCAN64)
if (::_BitScanReverse64(&__where, __x))
{
return static_cast<int>(63 - __where);
return 63 - static_cast<int>(__where);
}
# else
// Win32 doesn't have _BitScanReverse64 so emulate it with two 32 bit calls.
if (::_BitScanReverse(&__where, static_cast<uint32_t>(__x >> 32)))
{
return static_cast<int>(63 - (__where + 32));
return 63 - (static_cast<int>(__where) + 32);
}
if (::_BitScanReverse(&__where, static_cast<uint32_t>(__x)))
{
return static_cast<int>(63 - __where);
return 63 - static_cast<int>(__where);
}
# endif
return 64; // Undefined Behavior.
}

#endif // _CCCL_COMPILER(MSVC)

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_clz(uint64_t __x)
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_clz(uint64_t __x) noexcept
{
#if defined(__CUDA_ARCH__)
return ::__clzll(__x);
#elif _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv
return __runtime_clz_msvc
return _CUDA_VSTD::__runtime_clz_msvc(__x);
#else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv
return ::__builtin_clzll(__x);
#endif // !_CCCL_COMPILER(MSVC) ^^^
}

} // namespace __detail

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint32_t __x) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint32_t __x) noexcept
{
if (!__cccl_default_is_constant_evaluated())
{
return _CUDA_VSTD::__detail::__runtime_clz(__x);
return _CUDA_VSTD::__runtime_clz(__x);
}
return _CUDA_VSTD::__detail::__constexpr_clz(__x);
return _CUDA_VSTD::__constexpr_clz(__x);
}

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint64_t __x) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint64_t __x) noexcept
{
if (!__cccl_default_is_constant_evaluated())
{
return _CUDA_VSTD::__detail::__runtime_clz(__x);
return _CUDA_VSTD::__runtime_clz(__x);
}
return _CUDA_VSTD::__detail::__constexpr_clz(__x);
return _CUDA_VSTD::__constexpr_clz(__x);
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
14 changes: 4 additions & 10 deletions libcudacxx/include/cuda/std/__bit/countl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@

_LIBCUDACXX_BEGIN_NAMESPACE_STD

namespace __detail
{

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) == sizeof(uint32_t) || sizeof(_Tp) == sizeof(uint64_t), int>
__countl_zero(_Tp __t) noexcept
Expand All @@ -51,7 +48,7 @@ __countl_zero(_Tp __t) noexcept
template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) < sizeof(uint32_t), int> __countl_zero(_Tp __t) noexcept
{
return _CUDA_VSTD::__detail::__countl_zero(static_cast<uint32_t>(__t))
return _CUDA_VSTD::__countl_zero(static_cast<uint32_t>(__t))
- (numeric_limits<uint32_t>::digits - numeric_limits<_Tp>::digits);
}

Expand All @@ -68,27 +65,24 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t))
{
if (__a.__array[__i])
{
return _CUDA_VSTD::__detail::__countl_zero(__a.__array[__i])
+ (_Ratio - 1 - __i) * numeric_limits<uint64_t>::digits;
return _CUDA_VSTD::__countl_zero(__a.__array[__i]) + (_Ratio - 1 - __i) * numeric_limits<uint64_t>::digits;
}
}
return numeric_limits<_Tp>::digits;
}

} // namespace __detail

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countl_zero(_Tp __t) noexcept
{
return _CUDA_VSTD::__detail::__countl_zero(__t);
return _CUDA_VSTD::__countl_zero(__t);
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countl_one(_Tp __t) noexcept
{
return _CUDA_VSTD::__detail::__countl_zero(static_cast<_Tp>(~__t));
return _CUDA_VSTD::__countl_zero(static_cast<_Tp>(~__t));
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
11 changes: 3 additions & 8 deletions libcudacxx/include/cuda/std/__bit/countr.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@

_LIBCUDACXX_BEGIN_NAMESPACE_STD

namespace __detail
{

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) <= sizeof(uint64_t), int> __countr_zero(_Tp __t) noexcept
{
Expand All @@ -60,26 +57,24 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t))
{
if (__a.__array[__i])
{
return _CUDA_VSTD::__detail::__countr_zero(__a.__array[__i]) + __i * numeric_limits<uint64_t>::digits;
return _CUDA_VSTD::__countr_zero(__a.__array[__i]) + __i * numeric_limits<uint64_t>::digits;
}
}
return numeric_limits<_Tp>::digits;
}

} // namespace __detail

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countr_zero(_Tp __t) noexcept
{
return _CUDA_VSTD::__detail::__countr_zero(__t);
return _CUDA_VSTD::__countr_zero(__t);
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countr_one(_Tp __t) noexcept
{
return _CUDA_VSTD::__detail::__countr_zero(static_cast<_Tp>(~__t));
return _CUDA_VSTD::__countr_zero(static_cast<_Tp>(~__t));
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
27 changes: 11 additions & 16 deletions libcudacxx/include/cuda/std/__bit/ctz.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@

_LIBCUDACXX_BEGIN_NAMESPACE_STD

namespace __detail
{

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_ctz(uint32_t __x) noexcept
{
for (int __i = 0; __i < 32; ++__i)
Expand All @@ -57,7 +54,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_ctz(uint64_t __x) noexcept
return 64;
}

_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz(uint32_t __x)
_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz(uint32_t __x) noexcept
{
#if defined(__CUDA_ARCH__)
return ::__clz(__brev(__x));
Expand All @@ -75,7 +72,7 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz(uint32_t __x)

#if _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv

_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz_msvc(uint64_t __x)
_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz_msvc(uint64_t __x) noexcept
{
unsigned long __where = 0;
# if defined(_LIBCUDACXX_HAS_BITSCAN64) && (defined(_M_AMD64) || defined(__x86_64__))
Expand All @@ -91,43 +88,41 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz_msvc(uint64_t __x)
}
if (::_BitScanForward(&__where, static_cast<uint32_t>(__x >> 32)))
{
return static_cast<int>(__where + 32);
return static_cast<int>(__where) + 32;
}
# endif
return 64;
}

#endif // _CCCL_COMPILER(MSVC)

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_ctz(uint64_t __x)
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_ctz(uint64_t __x) noexcept
{
#if defined(__CUDA_ARCH__)
return ::__clzll(__brevll(__x));
#elif _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv
return __runtime_ctz_msvc
return _CUDA_VSTD::__runtime_ctz_msvc(__x);
#else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv
return ::__builtin_ctzll(__x);
#endif // !_CCCL_COMPILER(MSVC) ^^^
}

} // namespace __detail

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint32_t __x) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint32_t __x) noexcept
{
if (!__cccl_default_is_constant_evaluated())
{
return _CUDA_VSTD::__detail::__runtime_ctz(__x);
return _CUDA_VSTD::__runtime_ctz(__x);
}
return _CUDA_VSTD::__detail::__constexpr_ctz(__x);
return _CUDA_VSTD::__constexpr_ctz(__x);
}

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint64_t __x) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint64_t __x) noexcept
{
if (!__cccl_default_is_constant_evaluated())
{
return _CUDA_VSTD::__detail::__runtime_ctz(__x);
return _CUDA_VSTD::__runtime_ctz(__x);
}
return _CUDA_VSTD::__detail::__constexpr_ctz(__x);
return _CUDA_VSTD::__constexpr_ctz(__x);
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
11 changes: 3 additions & 8 deletions libcudacxx/include/cuda/std/__bit/integral.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@

_LIBCUDACXX_BEGIN_NAMESPACE_STD

namespace __detail
{

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr uint32_t __bit_log2(_Tp __t) noexcept
{
Expand All @@ -53,27 +50,25 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) < sizeof(uint32_t),
>> (numeric_limits<unsigned>::digits - numeric_limits<_Tp>::digits));
}

} // namespace __detail

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, _Tp>
bit_floor(_Tp __t) noexcept
{
return __t == 0 ? 0 : static_cast<_Tp>(_Tp{1} << _CUDA_VSTD::__detail::__bit_log2(__t));
return __t == 0 ? 0 : static_cast<_Tp>(_Tp{1} << _CUDA_VSTD::__bit_log2(__t));
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, _Tp>
bit_ceil(_Tp __t) noexcept
{
return (__t < 2) ? 1 : static_cast<_Tp>(_CUDA_VSTD::__detail::__ceil2(__t));
return (__t < 2) ? 1 : static_cast<_Tp>(_CUDA_VSTD::__ceil2(__t));
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
bit_width(_Tp __t) noexcept
{
return __t == 0 ? 0 : static_cast<int>(_CUDA_VSTD::__detail::__bit_log2(__t) + 1);
return __t == 0 ? 0 : static_cast<int>(_CUDA_VSTD::__bit_log2(__t) + 1);
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
31 changes: 9 additions & 22 deletions libcudacxx/include/cuda/std/__bit/popc.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,40 +29,29 @@
#endif // _CCCL_COMPILER(MSVC)

_LIBCUDACXX_BEGIN_NAMESPACE_STD
namespace __detail
{

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_popc(uint32_t __x) noexcept
{
#if defined(__CUDA_ARCH__) || _CCCL_COMPILER(MSVC)
// no device constexpr builtins
int __count = 0;
for (int __i = 0; __i < 32; ++__i)
{
__count += (__x & (uint32_t{1} << __i)) ? 1 : 0;
}
return __count;
#else
return ::__builtin_popcount(__x);
#endif
}

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_popc(uint64_t __x) noexcept
{
#if defined(__CUDA_ARCH__) || _CCCL_COMPILER(MSVC)
// no device constexpr builtins
int __count = 0;
for (int __i = 0; __i < 64; ++__i)
{
__count += (__x & (uint64_t{1} << __i)) ? 1 : 0;
}
return __count;
#else
return ::__builtin_popcountll(__x);
#endif
}

_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint32_t __x)
_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint32_t __x) noexcept
{
#if defined(__CUDA_ARCH__)
return ::__popc(__x);
Expand All @@ -75,37 +64,35 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint32_t __x)
#endif // !_CCCL_COMPILER(MSVC) ^^^
}

_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint64_t __x)
_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint64_t __x) noexcept
{
#if defined(__CUDA_ARCH__)
return ::__popcll(__x);
#elif _CCCL_COMPILER(MSVC) && !defined(_M_ARM64) // _CCCL_COMPILER(MSVC) + X86 vvv
return static_cast<int>(::__popcnt64(__x));
#elif _CCCL_COMPILER(MSVC) && defined(_M_ARM64) // _CCCL_COMPILER(MSVC) + X86 vvv
#elif _CCCL_COMPILER(MSVC) && defined(_M_ARM64) // _CCCL_COMPILER(MSVC) + ARM64 vvv
return static_cast<int>(::_CountOneBits64(__x));
#else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv
return ::__builtin_popcountll(__x);
#endif // !_CCCL_COMPILER(MSVC) ^^^
}

} // namespace __detail

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint32_t __x) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint32_t __x) noexcept
{
if (!__cccl_default_is_constant_evaluated())
{
return _CUDA_VSTD::__detail::__runtime_popc(__x);
return _CUDA_VSTD::__runtime_popc(__x);
}
return _CUDA_VSTD::__detail::__constexpr_popc(__x);
return _CUDA_VSTD::__constexpr_popc(__x);
}

_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint64_t __x) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint64_t __x) noexcept
{
if (!__cccl_default_is_constant_evaluated())
{
return _CUDA_VSTD::__detail::__runtime_popc(__x);
return _CUDA_VSTD::__runtime_popc(__x);
}
return _CUDA_VSTD::__detail::__constexpr_popc(__x);
return _CUDA_VSTD::__constexpr_popc(__x);
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
Loading

0 comments on commit 15d4d3e

Please sign in to comment.