From 80d0c5b233097129f2d11277d71e4aa148fa25a1 Mon Sep 17 00:00:00 2001 From: Liu Qiyu Date: Fri, 23 Aug 2024 13:25:43 +0800 Subject: [PATCH] update query generation --- main.cpp | 15 ++++++++------- utils.h | 37 ++++++++++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/main.cpp b/main.cpp index bafc0a6..69e36cc 100644 --- a/main.cpp +++ b/main.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "pgm_index.h" #include "search_algo.h" #include "utils.h" @@ -18,9 +19,9 @@ auto bench_search(const size_t& n, const size_t& nq) { std::cout << "====== n=" << n << " nq=" << nq << " ======" << std::endl; - auto data = benchmark::gen_random_keys(n); + auto data = benchmark::gen_random_keys(n, std::numeric_limits::max()); std::sort(data.begin(), data.end()); - auto queries = benchmark::gen_random_keys(nq); + auto queries = benchmark::gen_random_keys(nq, std::numeric_limits::max()); volatile uint64_t res = 0; auto start = std::chrono::high_resolution_clock::now(); @@ -165,20 +166,20 @@ int main(int argc, const char * argv[]) { // bench_search_repeat(20, 500, "/Users/liuqiyu/Desktop/bench_search_result_new.csv"); // exit(0); - const std::string fname = "/Users/liuqiyu/Desktop/SOSD_data/fb_200M_uint64"; - const size_t nq = 100; + const std::string fname = "/Users/liuqiyu/Desktop/SOSD_data/books_800M_uint64"; + const size_t nq = 200; const size_t repeat = 10; std::cout << "Load data from " << fname << std::endl; auto data = benchmark::load_data(fname); - std::sort(data.begin(), data.end()); + std::sort(data.begin(), data.end()-1); std::vector> bench_results; for (auto i=0; i(nq); + auto queries = benchmark::gen_random_queries(data, 500); bench_results.emplace_back(i, bench_pgm<4, 4>(data, queries)); bench_results.emplace_back(i, bench_pgm<8, 4>(data, queries)); @@ -242,7 +243,7 @@ int main(int argc, const char * argv[]) { } // start from 7 cold cache config - std::ofstream ofs("/Users/liuqiyu/Desktop/bench_pgm_result_new_repeat_10.csv"); + std::ofstream ofs("/Users/liuqiyu/Desktop/bench_pgm_result_books_repeat_10_0250.csv"); ofs << "round,eps_l,eps_i,levels,lls,ils,latency_branchy_i,latency_branchy_l,latency_branchless_i,latency_branchless_l" << std::endl; for (auto br : bench_results) { diff --git a/utils.h b/utils.h index 48b6731..6c6c230 100644 --- a/utils.h +++ b/utils.h @@ -8,6 +8,8 @@ #ifndef utils_h #define utils_h +#include +#include namespace benchmark { static uint64_t timing(std::function fn) { @@ -48,13 +50,42 @@ static std::vector load_data(const std::string& filename, return data; } +template +auto get_data_stats(const std::vector& data) { + std::vector gaps; + for (auto i=1; i -std::vector gen_random_keys(const size_t& n) { - std::vector data(n); - std::generate(data.begin(), data.end(), std::rand); +std::vector gen_random_keys(const size_t& n, const K& max) { + std::vector data(n); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, max); + for (auto i=0; i +std::vector gen_random_queries(const std::vector& data, const size_t& nq) { + std::random_device rd; + std::mt19937 gen(rd()); + std::vector sample; + std::sample(data.begin(), data.end()-1, std::back_inserter(sample), nq, gen); + return sample; +} + }