Skip to content

Commit

Permalink
Benchmark data generation bug fix (#402)
Browse files Browse the repository at this point in the history
* Created generate_limits in benchmarks utils for generating correct ranges for custom types

* Removed lambda function to fix build error and remove clang-format problems
  • Loading branch information
NB4444 authored Sep 27, 2024
1 parent c90f7cf commit f820c80
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 168 deletions.
30 changes: 12 additions & 18 deletions benchmark/benchmark_block_merge_sort.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -43,9 +43,8 @@ template<class T,
unsigned int ItemsPerThread,
class CompareOp,
unsigned int Trials>
__global__ __launch_bounds__(BlockSize) void sort_keys_kernel(const T* input,
T* output,
CompareOp compare_op)
__global__ __launch_bounds__(BlockSize)
void sort_keys_kernel(const T* input, T* output, CompareOp compare_op)
{
const unsigned int lid = hipThreadIdx_x;
const unsigned int block_offset = hipBlockIdx_x * ItemsPerThread * BlockSize;
Expand All @@ -68,9 +67,8 @@ template<class T,
unsigned int ItemsPerThread,
class CompareOp,
unsigned int Trials>
__global__ __launch_bounds__(BlockSize) void sort_pairs_kernel(const T* input,
T* output,
CompareOp compare_op)
__global__ __launch_bounds__(BlockSize)
void sort_pairs_kernel(const T* input, T* output, CompareOp compare_op)
{
const unsigned int lid = hipThreadIdx_x;
const unsigned int block_offset = hipBlockIdx_x * ItemsPerThread * BlockSize;
Expand Down Expand Up @@ -111,16 +109,11 @@ void run_benchmark(benchmark::State& state,
constexpr auto items_per_block = BlockSize * ItemsPerThread;
const auto size = items_per_block * ((N + items_per_block - 1) / items_per_block);

std::vector<T> input;
if(std::is_floating_point<T>::value)
{
input = benchmark_utils::get_random_data<T>(size, (T)-1000, (T) + 1000);
} else
{
input = benchmark_utils::get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
std::vector<T> input
= benchmark_utils::get_random_data<T>(size,
benchmark_utils::generate_limits<T>::min(),
benchmark_utils::generate_limits<T>::max());

T* d_input;
T* d_output;
HIP_CHECK(hipMalloc(&d_input, size * sizeof(T)));
Expand All @@ -143,7 +136,8 @@ void run_benchmark(benchmark::State& state,
d_input,
d_output,
CompareOp());
} else if(benchmark_kind == benchmark_kinds::sort_pairs)
}
else if(benchmark_kind == benchmark_kinds::sort_pairs)
{
hipLaunchKernelGGL(
HIP_KERNEL_NAME(sort_pairs_kernel<T, BlockSize, ItemsPerThread, CompareOp, Trials>),
Expand Down
19 changes: 6 additions & 13 deletions benchmark/benchmark_block_radix_rank.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -109,18 +109,11 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N)
constexpr unsigned int items_per_block = BlockSize * ItemsPerThread;
const unsigned int size = items_per_block * ((N + items_per_block - 1) / items_per_block);

std::vector<T> input;
if(std::is_floating_point<T>::value)
{
input = benchmark_utils::get_random_data<T>(size,
static_cast<T>(-1000),
static_cast<T>(1000));
} else
{
input = benchmark_utils::get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
std::vector<T> input
= benchmark_utils::get_random_data<T>(size,
benchmark_utils::generate_limits<T>::min(),
benchmark_utils::generate_limits<T>::max());

T* d_input;
int* d_output;
HIP_CHECK(hipMalloc(&d_input, size * sizeof(T)));
Expand Down
15 changes: 5 additions & 10 deletions benchmark/benchmark_block_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,11 @@ void run_benchmark(benchmark::State& state,
constexpr auto items_per_block = BlockSize * ItemsPerThread;
const auto size = items_per_block * ((N + items_per_block - 1) / items_per_block);

std::vector<T> input;
if(std::is_floating_point<T>::value)
{
input = benchmark_utils::get_random_data<T>(size, (T)-1000, (T) + 1000);
} else
{
input = benchmark_utils::get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
std::vector<T> input
= benchmark_utils::get_random_data<T>(size,
benchmark_utils::generate_limits<T>::min(),
benchmark_utils::generate_limits<T>::max());

T* d_input;
T* d_output;
HIP_CHECK(hipMalloc(&d_input, size * sizeof(T)));
Expand Down
15 changes: 5 additions & 10 deletions benchmark/benchmark_device_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,16 +268,11 @@ template<typename T,
void run_benchmark(benchmark::State& state, size_t size, const hipStream_t stream)
{
const size_t grid_size = size / (BlockSize * ItemsPerThread);
std::vector<T> input;
if(std::is_floating_point<T>::value)
{
input = benchmark_utils::get_random_data<T>(size, (T)-1000, (T) + 1000);
} else
{
input = benchmark_utils::get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
std::vector<T> input
= benchmark_utils::get_random_data<T>(size,
benchmark_utils::generate_limits<T>::min(),
benchmark_utils::generate_limits<T>::max());

T* d_input;
T* d_output;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&d_input), size * sizeof(T)));
Expand Down
48 changes: 24 additions & 24 deletions benchmark/benchmark_device_merge_sort.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -33,33 +33,28 @@ const size_t DEFAULT_N = 32 << 20;
const unsigned int batch_size = 10;
const unsigned int warmup_size = 5;

template<class Key>
std::vector<Key> generate_keys(size_t size)
template<class key_type>
struct CompareFunction
{
using key_type = Key;

if(std::is_floating_point<key_type>::value)
HIPCUB_DEVICE
inline constexpr bool
operator()(const key_type& a, const key_type& b)
{
return benchmark_utils::get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
size);
} else
{
return benchmark_utils::get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
size);
return a < b;
}
}
};

template<class Key>
void run_sort_keys_benchmark(benchmark::State& state, hipStream_t stream, size_t size)
{
using key_type = Key;
auto compare_function = [] __device__(const key_type& a, const key_type& b) { return a < b; };
using key_type = Key;

CompareFunction<key_type> compare_function;

auto keys_input = generate_keys<Key>(size);
std::vector<key_type> keys_input = benchmark_utils::get_random_data<key_type>(
size,
benchmark_utils::generate_limits<key_type>::min(),
benchmark_utils::generate_limits<key_type>::max());

key_type* d_keys_input;
key_type* d_keys_output;
Expand Down Expand Up @@ -126,11 +121,16 @@ void run_sort_keys_benchmark(benchmark::State& state, hipStream_t stream, size_t
template<class Key, class Value>
void run_sort_pairs_benchmark(benchmark::State& state, hipStream_t stream, size_t size)
{
using key_type = Key;
using value_type = Value;
auto compare_function = [] __device__(const key_type& a, const key_type& b) { return a < b; };
using key_type = Key;
using value_type = Value;

CompareFunction<key_type> compare_function;

std::vector<key_type> keys_input = benchmark_utils::get_random_data<key_type>(
size,
benchmark_utils::generate_limits<key_type>::min(),
benchmark_utils::generate_limits<key_type>::max());

auto keys_input = generate_keys<Key>(size);
std::vector<value_type> values_input(size);
for(size_t i = 0; i < size; i++)
{
Expand Down
17 changes: 4 additions & 13 deletions benchmark/benchmark_device_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,10 @@ std::vector<Key> generate_keys(size_t size)
{
using key_type = Key;

if(std::is_floating_point<key_type>::value)
{
return benchmark_utils::get_random_data<key_type>(size,
(key_type)-1000,
(key_type) + 1000,
size);
} else
{
return benchmark_utils::get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
size);
}
return benchmark_utils::get_random_data<key_type>(
size,
benchmark_utils::generate_limits<key_type>::min(),
benchmark_utils::generate_limits<key_type>::max());
}

template<bool Descending, class Key>
Expand Down
32 changes: 8 additions & 24 deletions benchmark/benchmark_device_segmented_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,10 @@ void run_sort_keys_benchmark(benchmark::State& state,
}
offsets.push_back(size);

std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input
= benchmark_utils::get_random_data<key_type>(size, (key_type)-1000, (key_type) + 1000);
} else
{
keys_input
= benchmark_utils::get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max());
}
std::vector<key_type> keys_input = benchmark_utils::get_random_data<key_type>(
size,
benchmark_utils::generate_limits<key_type>::min(),
benchmark_utils::generate_limits<key_type>::max());

offset_type* d_offsets;
HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type)));
Expand Down Expand Up @@ -230,18 +222,10 @@ void run_sort_pairs_benchmark(benchmark::State& state,
}
offsets.push_back(size);

std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input
= benchmark_utils::get_random_data<key_type>(size, (key_type)-1000, (key_type) + 1000);
} else
{
keys_input
= benchmark_utils::get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max());
}
std::vector<key_type> keys_input = benchmark_utils::get_random_data<key_type>(
size,
benchmark_utils::generate_limits<key_type>::min(),
benchmark_utils::generate_limits<key_type>::max());

std::vector<value_type> values_input(size);
std::iota(values_input.begin(), values_input.end(), 0);
Expand Down
34 changes: 8 additions & 26 deletions benchmark/benchmark_device_segmented_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,10 @@ void run_sort_keys_benchmark(benchmark::State& state,
}
offsets.push_back(size);

std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = benchmark_utils::get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000));
} else
{
keys_input
= benchmark_utils::get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max());
}
std::vector<key_type> keys_input = benchmark_utils::get_random_data<key_type>(
size,
benchmark_utils::generate_limits<key_type>::min(),
benchmark_utils::generate_limits<key_type>::max());

offset_type* d_offsets;
HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type)));
Expand Down Expand Up @@ -229,19 +220,10 @@ void run_sort_pairs_benchmark(benchmark::State& state,
}
offsets.push_back(size);

std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = benchmark_utils::get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000));
} else
{
keys_input
= benchmark_utils::get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max());
}
std::vector<key_type> keys_input = benchmark_utils::get_random_data<key_type>(
size,
benchmark_utils::generate_limits<key_type>::min(),
benchmark_utils::generate_limits<key_type>::max());

std::vector<value_type> values_input(size);
std::iota(values_input.begin(), values_input.end(), 0);
Expand Down
35 changes: 19 additions & 16 deletions benchmark/benchmark_device_select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,13 @@ void run_flagged_benchmark(benchmark::State& state,
const hipStream_t stream,
float true_probability)
{
std::vector<T> input;
std::vector<T> input
= benchmark_utils::get_random_data<T>(size,
benchmark_utils::generate_limits<T>::min(),
benchmark_utils::generate_limits<T>::max());

std::vector<FlagType> flags
= benchmark_utils::get_random_data01<FlagType>(size, true_probability);
if(std::is_floating_point<T>::value)
{
input = benchmark_utils::get_random_data<T>(size, T(-1000), T(1000));
} else
{
input = benchmark_utils::get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}

T* d_input;
FlagType* d_flags;
Expand Down Expand Up @@ -126,6 +121,19 @@ void run_flagged_benchmark(benchmark::State& state,
HIP_CHECK(hipDeviceSynchronize());
}

template<class T>
struct SelectOperator
{
float true_probability;
SelectOperator(float true_probability_) : true_probability(true_probability_) {}
HIPCUB_DEVICE
inline constexpr bool
operator()(const T& value)
{
return value < T(1000 * true_probability);
}
};

template<class T>
void run_selectop_benchmark(benchmark::State& state,
size_t size,
Expand All @@ -134,12 +142,7 @@ void run_selectop_benchmark(benchmark::State& state,
{
std::vector<T> input = benchmark_utils::get_random_data<T>(size, T(0), T(1000));

auto select_op = [true_probability] __device__(const T& value) -> bool
{
if(value < T(1000 * true_probability))
return true;
return false;
};
SelectOperator<T> select_op(true_probability);

T* d_input;
T* d_output;
Expand Down
Loading

0 comments on commit f820c80

Please sign in to comment.