diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a43cd21..92b55b01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,8 @@ Documentation for hipCUB is available at * Added `ForEach`, `ForEachN`, `ForEachCopy`, `ForEachCopyN` and `Bulk` functions to have parity with CUB. * Added the `hipcub::CubVector` type for CUB parity. -## (Unreleased) hipCUB-3.3.0 for ROCm 6.3.0 - -### Fixed - -* Not all headers in hipCUB included `config.hpp` which could have resulted in build errors. - ### Added + * Add support for large indices in `hipcub::DeviceSegmentedReduce::*`. rocPRIM's backend provides support for all reduce variants, but CUB's does not have support yet for `DeviceSegmentedReduce::Arg*`, so large indices support has been excluded for these as well in hipCUB. * Add -t smoke option in rtest.py. It will run a subset of tests such that the total test time is in 5 minutes. Use python3 ./rtest.py --test smoke or python3 ./rtest.py -t smoke to execute smoke test. * Add inplace overloads of `DeviceScan` functions. @@ -27,7 +22,11 @@ Documentation for hipCUB is available at ### Changed * The NVIDIA backend now requires CUB, Thrust and libcu++ 2.4.0. If it is not found it will be downloaded from the NVIDIA CCCL repository. -## (Unreleased) hipCUB-3.2.0 for ROCm 6.2.0 +### Resolved issues + +* Not all headers in hipCUB included `config.hpp` which could have resulted in build errors. + +## hipCUB-3.2.0 for ROCm 6.2.0 ### Added * Add `DeviceCopy` function to have parity with CUB. diff --git a/hipcub/include/hipcub/backend/rocprim/util_type.hpp b/hipcub/include/hipcub/backend/rocprim/util_type.hpp index dd550e55..91e2ac5c 100644 --- a/hipcub/include/hipcub/backend/rocprim/util_type.hpp +++ b/hipcub/include/hipcub/backend/rocprim/util_type.hpp @@ -547,12 +547,12 @@ struct BaseTraits static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key; } static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key; } static HIPCUB_HOST_DEVICE __forceinline__ T Max() @@ -596,12 +596,12 @@ struct BaseTraits static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static HIPCUB_HOST_DEVICE __forceinline__ T Max() @@ -695,12 +695,14 @@ struct BaseTraits static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; + return key ^ mask; }; static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); + return key ^ mask; }; static HIPCUB_HOST_DEVICE __forceinline__ T Max() { @@ -751,12 +753,12 @@ struct NumericTraits<__uint128_t> static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key; } static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key; } static __host__ __device__ __forceinline__ T Max() @@ -788,12 +790,12 @@ struct NumericTraits<__int128_t> static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static __host__ __device__ __forceinline__ T Max() diff --git a/scripts/copyright-date/check-copyright.sh b/scripts/copyright-date/check-copyright.sh index 98b6d407..3de80af0 100755 --- a/scripts/copyright-date/check-copyright.sh +++ b/scripts/copyright-date/check-copyright.sh @@ -61,7 +61,6 @@ if $forkdiff; then source_commit="remotes/$remote/HEAD" # don't use fork-point for finding fork point (lol) - # see: https://stackoverflow.com/a/53981615 diff_hash="$(git merge-base "$source_commit" "$branch")" fi