From 15d4d3e7d94b651cd0cada3946dda32686baf5a0 Mon Sep 17 00:00:00 2001 From: fbusato Date: Thu, 2 Jan 2025 18:38:48 +0000 Subject: [PATCH] remove __detail namespace and minor fixes --- libcudacxx/include/cuda/std/__bit/clz.h | 33 +++++++++----------- libcudacxx/include/cuda/std/__bit/countl.h | 14 +++------ libcudacxx/include/cuda/std/__bit/countr.h | 11 ++----- libcudacxx/include/cuda/std/__bit/ctz.h | 27 +++++++--------- libcudacxx/include/cuda/std/__bit/integral.h | 11 ++----- libcudacxx/include/cuda/std/__bit/popc.h | 31 ++++++------------ libcudacxx/include/cuda/std/__bit/popcount.h | 7 +---- libcudacxx/include/cuda/std/__bit/rotate.h | 8 ++--- 8 files changed, 47 insertions(+), 95 deletions(-) diff --git a/libcudacxx/include/cuda/std/__bit/clz.h b/libcudacxx/include/cuda/std/__bit/clz.h index 27e2bf83def..16f98354e55 100644 --- a/libcudacxx/include/cuda/std/__bit/clz.h +++ b/libcudacxx/include/cuda/std/__bit/clz.h @@ -30,9 +30,6 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ - _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_clz(uint32_t __x) noexcept { for (int __i = 31; __i >= 0; --__i) @@ -57,7 +54,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_clz(uint64_t __x) noexcept return 64; } -_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(uint32_t __x) +_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(uint32_t __x) noexcept { #if defined(__CUDA_ARCH__) return ::__clz(__x); @@ -65,7 +62,7 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(uint32_t __x) unsigned long __where = 0; if (::_BitScanReverse32(&__where, __x)) { - return static_cast(31 - __where); + return 31 - static_cast(__where); } return 32; // Undefined Behavior. #else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv @@ -75,23 +72,23 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(uint32_t __x) #if _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv -_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz_msvc(uint64_t __x) +_LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz_msvc(uint64_t __x) noexcept { unsigned long __where = 0; # if defined(_LIBCUDACXX_HAS_BITSCAN64) if (::_BitScanReverse64(&__where, __x)) { - return static_cast(63 - __where); + return 63 - static_cast(__where); } # else // Win32 doesn't have _BitScanReverse64 so emulate it with two 32 bit calls. if (::_BitScanReverse(&__where, static_cast(__x >> 32))) { - return static_cast(63 - (__where + 32)); + return 63 - (static_cast(__where) + 32); } if (::_BitScanReverse(&__where, static_cast(__x))) { - return static_cast(63 - __where); + return 63 - static_cast(__where); } # endif return 64; // Undefined Behavior. @@ -99,35 +96,33 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz_msvc(uint64_t __x) #endif // _CCCL_COMPILER(MSVC) -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_clz(uint64_t __x) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_clz(uint64_t __x) noexcept { #if defined(__CUDA_ARCH__) return ::__clzll(__x); #elif _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv - return __runtime_clz_msvc + return _CUDA_VSTD::__runtime_clz_msvc(__x); #else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv return ::__builtin_clzll(__x); #endif // !_CCCL_COMPILER(MSVC) ^^^ } -} // namespace __detail - -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint32_t __x) noexcept +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint32_t __x) noexcept { if (!__cccl_default_is_constant_evaluated()) { - return _CUDA_VSTD::__detail::__runtime_clz(__x); + return _CUDA_VSTD::__runtime_clz(__x); } - return _CUDA_VSTD::__detail::__constexpr_clz(__x); + return _CUDA_VSTD::__constexpr_clz(__x); } -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint64_t __x) noexcept +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(uint64_t __x) noexcept { if (!__cccl_default_is_constant_evaluated()) { - return _CUDA_VSTD::__detail::__runtime_clz(__x); + return _CUDA_VSTD::__runtime_clz(__x); } - return _CUDA_VSTD::__detail::__constexpr_clz(__x); + return _CUDA_VSTD::__constexpr_clz(__x); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/countl.h b/libcudacxx/include/cuda/std/__bit/countl.h index 145d66853b2..13c29835e70 100644 --- a/libcudacxx/include/cuda/std/__bit/countl.h +++ b/libcudacxx/include/cuda/std/__bit/countl.h @@ -32,9 +32,6 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ - template _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __countl_zero(_Tp __t) noexcept @@ -51,7 +48,7 @@ __countl_zero(_Tp __t) noexcept template _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __countl_zero(_Tp __t) noexcept { - return _CUDA_VSTD::__detail::__countl_zero(static_cast(__t)) + return _CUDA_VSTD::__countl_zero(static_cast(__t)) - (numeric_limits::digits - numeric_limits<_Tp>::digits); } @@ -68,27 +65,24 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)) { if (__a.__array[__i]) { - return _CUDA_VSTD::__detail::__countl_zero(__a.__array[__i]) - + (_Ratio - 1 - __i) * numeric_limits::digits; + return _CUDA_VSTD::__countl_zero(__a.__array[__i]) + (_Ratio - 1 - __i) * numeric_limits::digits; } } return numeric_limits<_Tp>::digits; } -} // namespace __detail - template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> countl_zero(_Tp __t) noexcept { - return _CUDA_VSTD::__detail::__countl_zero(__t); + return _CUDA_VSTD::__countl_zero(__t); } template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> countl_one(_Tp __t) noexcept { - return _CUDA_VSTD::__detail::__countl_zero(static_cast<_Tp>(~__t)); + return _CUDA_VSTD::__countl_zero(static_cast<_Tp>(~__t)); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/countr.h b/libcudacxx/include/cuda/std/__bit/countr.h index fc015a7afda..f6c1bdc4e90 100644 --- a/libcudacxx/include/cuda/std/__bit/countr.h +++ b/libcudacxx/include/cuda/std/__bit/countr.h @@ -32,9 +32,6 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ - template _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __countr_zero(_Tp __t) noexcept { @@ -60,26 +57,24 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)) { if (__a.__array[__i]) { - return _CUDA_VSTD::__detail::__countr_zero(__a.__array[__i]) + __i * numeric_limits::digits; + return _CUDA_VSTD::__countr_zero(__a.__array[__i]) + __i * numeric_limits::digits; } } return numeric_limits<_Tp>::digits; } -} // namespace __detail - template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> countr_zero(_Tp __t) noexcept { - return _CUDA_VSTD::__detail::__countr_zero(__t); + return _CUDA_VSTD::__countr_zero(__t); } template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> countr_one(_Tp __t) noexcept { - return _CUDA_VSTD::__detail::__countr_zero(static_cast<_Tp>(~__t)); + return _CUDA_VSTD::__countr_zero(static_cast<_Tp>(~__t)); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/ctz.h b/libcudacxx/include/cuda/std/__bit/ctz.h index 51bb3c5ff28..9ab79e59290 100644 --- a/libcudacxx/include/cuda/std/__bit/ctz.h +++ b/libcudacxx/include/cuda/std/__bit/ctz.h @@ -30,9 +30,6 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ - _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_ctz(uint32_t __x) noexcept { for (int __i = 0; __i < 32; ++__i) @@ -57,7 +54,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_ctz(uint64_t __x) noexcept return 64; } -_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz(uint32_t __x) +_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz(uint32_t __x) noexcept { #if defined(__CUDA_ARCH__) return ::__clz(__brev(__x)); @@ -75,7 +72,7 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz(uint32_t __x) #if _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv -_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz_msvc(uint64_t __x) +_LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz_msvc(uint64_t __x) noexcept { unsigned long __where = 0; # if defined(_LIBCUDACXX_HAS_BITSCAN64) && (defined(_M_AMD64) || defined(__x86_64__)) @@ -91,7 +88,7 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz_msvc(uint64_t __x) } if (::_BitScanForward(&__where, static_cast(__x >> 32))) { - return static_cast(__where + 32); + return static_cast(__where) + 32; } # endif return 64; @@ -99,35 +96,33 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz_msvc(uint64_t __x) #endif // _CCCL_COMPILER(MSVC) -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_ctz(uint64_t __x) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __runtime_ctz(uint64_t __x) noexcept { #if defined(__CUDA_ARCH__) return ::__clzll(__brevll(__x)); #elif _CCCL_COMPILER(MSVC) // _CCCL_COMPILER(MSVC) vvv - return __runtime_ctz_msvc + return _CUDA_VSTD::__runtime_ctz_msvc(__x); #else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv return ::__builtin_ctzll(__x); #endif // !_CCCL_COMPILER(MSVC) ^^^ } -} // namespace __detail - -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint32_t __x) noexcept +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint32_t __x) noexcept { if (!__cccl_default_is_constant_evaluated()) { - return _CUDA_VSTD::__detail::__runtime_ctz(__x); + return _CUDA_VSTD::__runtime_ctz(__x); } - return _CUDA_VSTD::__detail::__constexpr_ctz(__x); + return _CUDA_VSTD::__constexpr_ctz(__x); } -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint64_t __x) noexcept +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(uint64_t __x) noexcept { if (!__cccl_default_is_constant_evaluated()) { - return _CUDA_VSTD::__detail::__runtime_ctz(__x); + return _CUDA_VSTD::__runtime_ctz(__x); } - return _CUDA_VSTD::__detail::__constexpr_ctz(__x); + return _CUDA_VSTD::__constexpr_ctz(__x); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/integral.h b/libcudacxx/include/cuda/std/__bit/integral.h index 9acd289e632..01516f6e6f0 100644 --- a/libcudacxx/include/cuda/std/__bit/integral.h +++ b/libcudacxx/include/cuda/std/__bit/integral.h @@ -29,9 +29,6 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ - template _LIBCUDACXX_HIDE_FROM_ABI constexpr uint32_t __bit_log2(_Tp __t) noexcept { @@ -53,27 +50,25 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t> (numeric_limits::digits - numeric_limits<_Tp>::digits)); } -} // namespace __detail - template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, _Tp> bit_floor(_Tp __t) noexcept { - return __t == 0 ? 0 : static_cast<_Tp>(_Tp{1} << _CUDA_VSTD::__detail::__bit_log2(__t)); + return __t == 0 ? 0 : static_cast<_Tp>(_Tp{1} << _CUDA_VSTD::__bit_log2(__t)); } template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, _Tp> bit_ceil(_Tp __t) noexcept { - return (__t < 2) ? 1 : static_cast<_Tp>(_CUDA_VSTD::__detail::__ceil2(__t)); + return (__t < 2) ? 1 : static_cast<_Tp>(_CUDA_VSTD::__ceil2(__t)); } template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> bit_width(_Tp __t) noexcept { - return __t == 0 ? 0 : static_cast(_CUDA_VSTD::__detail::__bit_log2(__t) + 1); + return __t == 0 ? 0 : static_cast(_CUDA_VSTD::__bit_log2(__t) + 1); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/popc.h b/libcudacxx/include/cuda/std/__bit/popc.h index 877c68e7d40..17d0f3946b5 100644 --- a/libcudacxx/include/cuda/std/__bit/popc.h +++ b/libcudacxx/include/cuda/std/__bit/popc.h @@ -29,12 +29,9 @@ #endif // _CCCL_COMPILER(MSVC) _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_popc(uint32_t __x) noexcept { -#if defined(__CUDA_ARCH__) || _CCCL_COMPILER(MSVC) // no device constexpr builtins int __count = 0; for (int __i = 0; __i < 32; ++__i) @@ -42,27 +39,19 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_popc(uint32_t __x) noexcept __count += (__x & (uint32_t{1} << __i)) ? 1 : 0; } return __count; -#else - return ::__builtin_popcount(__x); -#endif } _LIBCUDACXX_HIDE_FROM_ABI constexpr int __constexpr_popc(uint64_t __x) noexcept { -#if defined(__CUDA_ARCH__) || _CCCL_COMPILER(MSVC) - // no device constexpr builtins int __count = 0; for (int __i = 0; __i < 64; ++__i) { __count += (__x & (uint64_t{1} << __i)) ? 1 : 0; } return __count; -#else - return ::__builtin_popcountll(__x); -#endif } -_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint32_t __x) +_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint32_t __x) noexcept { #if defined(__CUDA_ARCH__) return ::__popc(__x); @@ -75,37 +64,35 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint32_t __x) #endif // !_CCCL_COMPILER(MSVC) ^^^ } -_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint64_t __x) +_LIBCUDACXX_HIDE_FROM_ABI int __runtime_popc(uint64_t __x) noexcept { #if defined(__CUDA_ARCH__) return ::__popcll(__x); #elif _CCCL_COMPILER(MSVC) && !defined(_M_ARM64) // _CCCL_COMPILER(MSVC) + X86 vvv return static_cast(::__popcnt64(__x)); -#elif _CCCL_COMPILER(MSVC) && defined(_M_ARM64) // _CCCL_COMPILER(MSVC) + X86 vvv +#elif _CCCL_COMPILER(MSVC) && defined(_M_ARM64) // _CCCL_COMPILER(MSVC) + ARM64 vvv return static_cast(::_CountOneBits64(__x)); #else // _CCCL_COMPILER(MSVC) ^^^ / !_CCCL_COMPILER(MSVC) vvv return ::__builtin_popcountll(__x); #endif // !_CCCL_COMPILER(MSVC) ^^^ } -} // namespace __detail - -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint32_t __x) noexcept +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint32_t __x) noexcept { if (!__cccl_default_is_constant_evaluated()) { - return _CUDA_VSTD::__detail::__runtime_popc(__x); + return _CUDA_VSTD::__runtime_popc(__x); } - return _CUDA_VSTD::__detail::__constexpr_popc(__x); + return _CUDA_VSTD::__constexpr_popc(__x); } -_LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint64_t __x) noexcept +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_popc(uint64_t __x) noexcept { if (!__cccl_default_is_constant_evaluated()) { - return _CUDA_VSTD::__detail::__runtime_popc(__x); + return _CUDA_VSTD::__runtime_popc(__x); } - return _CUDA_VSTD::__detail::__constexpr_popc(__x); + return _CUDA_VSTD::__constexpr_popc(__x); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/popcount.h b/libcudacxx/include/cuda/std/__bit/popcount.h index c8feab96fa5..c29b0bd8a02 100644 --- a/libcudacxx/include/cuda/std/__bit/popcount.h +++ b/libcudacxx/include/cuda/std/__bit/popcount.h @@ -29,9 +29,6 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ - template _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __popcount(_Tp __t) noexcept { @@ -52,13 +49,11 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)) return __count; } -} // namespace __detail - template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> popcount(_Tp __t) noexcept { - return _CUDA_VSTD::__detail::__popcount(__t); + return _CUDA_VSTD::__popcount(__t); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/rotate.h b/libcudacxx/include/cuda/std/__bit/rotate.h index 5480432e30b..338ebb38b31 100644 --- a/libcudacxx/include/cuda/std/__bit/rotate.h +++ b/libcudacxx/include/cuda/std/__bit/rotate.h @@ -27,8 +27,6 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -namespace __detail -{ template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp __rotl(_Tp __t, unsigned int __cnt) noexcept { @@ -51,13 +49,11 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp __rotr(_Tp __t, unsigned : (__t >> (__cnt % __nlt::digits)) | (__t << (__nlt::digits - (__cnt % __nlt::digits))); } -} // namespace __detail - template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, _Tp> rotl(_Tp __t, unsigned int __cnt) noexcept { - return _CUDA_VSTD::__detail::__rotl(__t, __cnt); + return _CUDA_VSTD::__rotl(__t, __cnt); } // rotr @@ -65,7 +61,7 @@ template _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, _Tp> rotr(_Tp __t, unsigned int __cnt) noexcept { - return _CUDA_VSTD::__detail::__rotr(__t, __cnt); + return _CUDA_VSTD::__rotr(__t, __cnt); } _LIBCUDACXX_END_NAMESPACE_STD