Skip to content

Commit

Permalink
add prefetch
Browse files Browse the repository at this point in the history
  • Loading branch information
qyliu-hkust committed Aug 25, 2024
1 parent e258919 commit a26f617
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 24 deletions.
38 changes: 22 additions & 16 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,35 +23,44 @@ auto bench_search(const size_t& n, const size_t& nq) {
std::sort(data.begin(), data.end());
auto queries = benchmark::gen_random_keys<uint64_t>(nq, std::numeric_limits<uint64_t>::max());

volatile uint64_t res = 0;
auto start = std::chrono::high_resolution_clock::now();
uint64_t res = 0;

size_t duration_linear = 0;
for (auto q : queries) {
auto start = std::chrono::high_resolution_clock::now();
res = *search::lower_bound_linear(data.begin(), data.end(), q);
auto end = std::chrono::high_resolution_clock::now();
duration_linear += std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
}
auto end = std::chrono::high_resolution_clock::now();
auto duration_linear = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();

std::cout << "Search result " << res << std::endl;
std::cout << "Query latency (linear) " << duration_linear / nq << std::endl;

std::vector<uint64_t> data_cpy1(data);
std::vector<uint64_t> queries_cpy1(queries);
start = std::chrono::high_resolution_clock::now();

size_t duration_branchless = 0;
for (auto q : queries_cpy1) {
auto start = std::chrono::high_resolution_clock::now();
res = *search::upper_bound_branchless(data_cpy1.begin(), data_cpy1.end(), q);
auto end = std::chrono::high_resolution_clock::now();
duration_branchless += std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
}
end = std::chrono::high_resolution_clock::now();
auto duration_branchless = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();

std::cout << "Search result " << res << std::endl;
std::cout << "Query latency (branchless) " << duration_branchless / nq << std::endl;

std::vector<uint64_t> data_cpy2(data);
std::vector<uint64_t> queries_cpy2(queries);
start = std::chrono::high_resolution_clock::now();

size_t duration_branchy = 0;
for (auto q : queries_cpy2) {
auto start = std::chrono::high_resolution_clock::now();
res = *std::lower_bound(data_cpy2.begin(), data_cpy2.end(), q);
auto end = std::chrono::high_resolution_clock::now();
duration_branchy += std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
}
end = std::chrono::high_resolution_clock::now();
auto duration_branchy = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();

std::cout << "Search result " << res << std::endl;
std::cout << "Query latency (branchy) " << duration_branchy / nq << std::endl;

Expand Down Expand Up @@ -178,10 +187,7 @@ auto bench_pgm(const std::vector<uint64_t>& data, const std::vector<uint64_t>& q


int main(int argc, const char * argv[]) {
// bench_search_repeat(20, 500, "/Users/liuqiyu/Desktop/bench_search_result_new.csv");
// exit(0);

const std::string fname = argv[1];
const std::string fname = "/Users/liuqiyu/Desktop/SOSD_data/osm_cellids_800M_uint64";
const size_t nq = 500;
const size_t repeat = 10;

Expand All @@ -192,7 +198,7 @@ int main(int argc, const char * argv[]) {
auto data_stats = benchmark::get_data_stats(data);
std::cout << "mean: " << data_stats.mean
<< " variance: " << data_stats.var
<< " hardness ratio: " << data_stats.var/data_stats.mean << std::endl;
<< " hardness ratio: " << data_stats.var/(data_stats.mean*data_stats.mean) << std::endl;

std::vector<std::pair<size_t, stats>> bench_results;

Expand Down Expand Up @@ -293,7 +299,7 @@ int main(int argc, const char * argv[]) {
}

// start from 7 cold cache config
std::ofstream ofs(argv[2]);
std::ofstream ofs("/Users/liuqiyu/Desktop/bench_pgm_result_fb_repeat_10_0824_1127.csv");
ofs << "round,eps_l,eps_i,levels,lls,ils,latency_branchy_i,latency_branchy_l,latency_branchless_i,latency_branchless_l" << std::endl;

for (auto br : bench_results) {
Expand Down
22 changes: 15 additions & 7 deletions search_algo.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#ifndef search_algo_h
#define search_algo_h

#define IS_PREFETCH

#include <functional>

namespace search {
Expand All @@ -29,12 +31,18 @@ inline RandomIt lower_bound_branchless(RandomIt start, RandomIt end, const K& ke
auto n = std::distance(start, end);

while (n > 1) {
const K half = n / 2;
base = (base[half] < key) ? base + half : base; // w.r.t. cmov instruction
auto half = n / 2;
n -= half;

#ifdef IS_PREFETCH
__builtin_prefetch(&base[n / 2 - 1]);
__builtin_prefetch(&base[half + n / 2 - 1]);
#endif

base = (base[half - 1] < key) ? base + half : base; // w.r.t. cmov instruction
}

return (key < *start) ? base : std::next(base);
return base;
}

template<typename RandomIt, typename K, size_t search_bound=64>
Expand Down Expand Up @@ -79,12 +87,12 @@ inline RandomIt upper_bound_branchless(RandomIt start, RandomIt end, const K& ke
auto n = std::distance(start, end);

while (n > 1) {
const K half = n / 2;
base = (base[half] <= key) ? base + half : base; // w.r.t. cmov instruction
auto half = n / 2;
n -= half;
base = (base[half-1] <= key) ? base + half : base; // w.r.t. cmov instruction
}

return (*base <= key) ? std::next(base) : base;
return base;
}
}

Expand All @@ -93,7 +101,7 @@ void test_lower_bound(std::vector<K> data, std::vector<K> queries) {
std::sort(data.begin(), data.end());
bool flag = 1;
for (auto i=0; i<queries.size(); ++i) {
auto res_lb_new = search::lower_bound_interpolation(data.begin(), data.end(), queries[i]);
auto res_lb_new = search::lower_bound_branchless(data.begin(), data.end(), queries[i]);
auto res_lb_std = std::lower_bound(data.begin(), data.end(), queries[i]);

if (*res_lb_new != *res_lb_std) {
Expand Down
2 changes: 1 addition & 1 deletion utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static std::vector<T> load_data(const std::string& filename, bool print = true,
template <typename K>
auto get_data_stats(const std::vector<K>& data) {
std::vector<K> gaps;
for (auto i=1; i<data.size(); ++i) {
for (auto i=1; i<data.size()-1; ++i) {
gaps.emplace_back(data[i]-data[i-1]);
}
const auto n = gaps.size();
Expand Down

0 comments on commit a26f617

Please sign in to comment.