Skip to content

Commit

Permalink
Merge back updates from the 6.3 release
Browse files Browse the repository at this point in the history
Merge back commits from release/rocm-rel-6.3 to develop.
  • Loading branch information
umfranzw committed Oct 29, 2024
2 parents 07e6201 + 7471ad0 commit 4bcc4d9
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 18 deletions.
13 changes: 6 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,8 @@ Documentation for hipCUB is available at
* Added `ForEach`, `ForEachN`, `ForEachCopy`, `ForEachCopyN` and `Bulk` functions to have parity with CUB.
* Added the `hipcub::CubVector` type for CUB parity.

## (Unreleased) hipCUB-3.3.0 for ROCm 6.3.0

### Fixed

* Not all headers in hipCUB included `config.hpp` which could have resulted in build errors.

### Added

* Add support for large indices in `hipcub::DeviceSegmentedReduce::*`. rocPRIM's backend provides support for all reduce variants, but CUB's does not have support yet for `DeviceSegmentedReduce::Arg*`, so large indices support has been excluded for these as well in hipCUB.
* Add -t smoke option in rtest.py. It will run a subset of tests such that the total test time is in 5 minutes. Use python3 ./rtest.py --test smoke or python3 ./rtest.py -t smoke to execute smoke test.
* Add inplace overloads of `DeviceScan` functions.
Expand All @@ -27,7 +22,11 @@ Documentation for hipCUB is available at
### Changed
* The NVIDIA backend now requires CUB, Thrust and libcu++ 2.4.0. If it is not found it will be downloaded from the NVIDIA CCCL repository.

## (Unreleased) hipCUB-3.2.0 for ROCm 6.2.0
### Resolved issues

* Not all headers in hipCUB included `config.hpp` which could have resulted in build errors.

## hipCUB-3.2.0 for ROCm 6.2.0

### Added
* Add `DeviceCopy` function to have parity with CUB.
Expand Down
22 changes: 12 additions & 10 deletions hipcub/include/hipcub/backend/rocprim/util_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,12 +547,12 @@ struct BaseTraits<UNSIGNED_INTEGER, true, false, _UnsignedBits, T>

static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
{
return key_codec::encode(rocprim::detail::bit_cast<T>(key));
return key;
}

static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
{
return key_codec::decode(rocprim::detail::bit_cast<T>(key));
return key;
}

static HIPCUB_HOST_DEVICE __forceinline__ T Max()
Expand Down Expand Up @@ -596,12 +596,12 @@ struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>

static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
{
return key_codec::encode(rocprim::detail::bit_cast<T>(key));
return key ^ HIGH_BIT;
};

static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
{
return key_codec::decode(rocprim::detail::bit_cast<T>(key));
return key ^ HIGH_BIT;
};

static HIPCUB_HOST_DEVICE __forceinline__ T Max()
Expand Down Expand Up @@ -695,12 +695,14 @@ struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>

static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
{
return key_codec::encode(rocprim::detail::bit_cast<T>(key));
UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT;
return key ^ mask;
};

static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
{
return key_codec::decode(rocprim::detail::bit_cast<T>(key));
UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1);
return key ^ mask;
};

static HIPCUB_HOST_DEVICE __forceinline__ T Max() {
Expand Down Expand Up @@ -751,12 +753,12 @@ struct NumericTraits<__uint128_t>

static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
{
return key_codec::encode(rocprim::detail::bit_cast<T>(key));
return key;
}

static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
{
return key_codec::decode(rocprim::detail::bit_cast<T>(key));
return key;
}

static __host__ __device__ __forceinline__ T Max()
Expand Down Expand Up @@ -788,12 +790,12 @@ struct NumericTraits<__int128_t>

static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
{
return key_codec::encode(rocprim::detail::bit_cast<T>(key));
return key ^ HIGH_BIT;
};

static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
{
return key_codec::decode(rocprim::detail::bit_cast<T>(key));
return key ^ HIGH_BIT;
};

static __host__ __device__ __forceinline__ T Max()
Expand Down
1 change: 0 additions & 1 deletion scripts/copyright-date/check-copyright.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ if $forkdiff; then
source_commit="remotes/$remote/HEAD"

# don't use fork-point for finding fork point (lol)
# see: https://stackoverflow.com/a/53981615
diff_hash="$(git merge-base "$source_commit" "$branch")"
fi

Expand Down

0 comments on commit 4bcc4d9

Please sign in to comment.