Skip to content

Commit

Permalink
update query generation
Browse files Browse the repository at this point in the history
  • Loading branch information
qyliu-hkust committed Aug 23, 2024
1 parent 7ca4607 commit 80d0c5b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 10 deletions.
15 changes: 8 additions & 7 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@
#include <chrono>
#include <fstream>
#include <functional>
#include <limits>
#include "pgm_index.h"
#include "search_algo.h"
#include "utils.h"


auto bench_search(const size_t& n, const size_t& nq) {
std::cout << "====== n=" << n << " nq=" << nq << " ======" << std::endl;
auto data = benchmark::gen_random_keys<uint64_t>(n);
auto data = benchmark::gen_random_keys<uint64_t>(n, std::numeric_limits<uint64_t>::max());
std::sort(data.begin(), data.end());
auto queries = benchmark::gen_random_keys<uint64_t>(nq);
auto queries = benchmark::gen_random_keys<uint64_t>(nq, std::numeric_limits<uint64_t>::max());

volatile uint64_t res = 0;
auto start = std::chrono::high_resolution_clock::now();
Expand Down Expand Up @@ -165,20 +166,20 @@ int main(int argc, const char * argv[]) {
// bench_search_repeat(20, 500, "/Users/liuqiyu/Desktop/bench_search_result_new.csv");
// exit(0);

const std::string fname = "/Users/liuqiyu/Desktop/SOSD_data/fb_200M_uint64";
const size_t nq = 100;
const std::string fname = "/Users/liuqiyu/Desktop/SOSD_data/books_800M_uint64";
const size_t nq = 200;
const size_t repeat = 10;

std::cout << "Load data from " << fname << std::endl;
auto data = benchmark::load_data<uint64_t>(fname);
std::sort(data.begin(), data.end());
std::sort(data.begin(), data.end()-1);

std::vector<std::pair<size_t, stats>> bench_results;

for (auto i=0; i<repeat; ++i) {
std::cout << "Round " << i << std::endl;
std::cout << "Generate " << nq << " random search keys." << std::endl;
auto queries = benchmark::gen_random_keys<uint64_t>(nq);
auto queries = benchmark::gen_random_queries(data, 500);

bench_results.emplace_back(i, bench_pgm<4, 4>(data, queries));
bench_results.emplace_back(i, bench_pgm<8, 4>(data, queries));
Expand Down Expand Up @@ -242,7 +243,7 @@ int main(int argc, const char * argv[]) {
}

// start from 7 cold cache config
std::ofstream ofs("/Users/liuqiyu/Desktop/bench_pgm_result_new_repeat_10.csv");
std::ofstream ofs("/Users/liuqiyu/Desktop/bench_pgm_result_books_repeat_10_0250.csv");
ofs << "round,eps_l,eps_i,levels,lls,ils,latency_branchy_i,latency_branchy_l,latency_branchless_i,latency_branchless_l" << std::endl;

for (auto br : bench_results) {
Expand Down
37 changes: 34 additions & 3 deletions utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#ifndef utils_h
#define utils_h

#include <random>
#include <numeric>

namespace benchmark {
static uint64_t timing(std::function<void()> fn) {
Expand Down Expand Up @@ -48,13 +50,42 @@ static std::vector<T> load_data(const std::string& filename,
return data;
}

template <typename K>
auto get_data_stats(const std::vector<K>& data) {
std::vector<K> gaps;
for (auto i=1; i<data.size(); ++i) {
gaps.emplace_back(data[i]-data[i-1]);
}
const auto n = gaps.size();
double mean = std::accumulate(gaps.begin(), gaps.end(), 0.0)/n;
double sq_sum = std::inner_product(gaps.begin(), gaps.end(), gaps.begin(), 0.0);
double var = sq_sum/n - mean*mean;
struct data_stats {double mean; double var;};
return data_stats {mean, var};
}


template<typename K>
std::vector<K> gen_random_keys(const size_t& n) {
std::vector<uint64_t> data(n);
std::generate(data.begin(), data.end(), std::rand);
std::vector<K> gen_random_keys(const size_t& n, const K& max) {
std::vector<K> data(n);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<K> dis(0, max);
for (auto i=0; i<data.size(); ++i) {
data[i] = dis(gen);
}
return data;
}

template<typename K>
std::vector<K> gen_random_queries(const std::vector<K>& data, const size_t& nq) {
std::random_device rd;
std::mt19937 gen(rd());
std::vector<K> sample;
std::sample(data.begin(), data.end()-1, std::back_inserter(sample), nq, gen);
return sample;
}

}


Expand Down

0 comments on commit 80d0c5b

Please sign in to comment.