diff --git a/main.cpp b/main.cpp index 69e36cc..c5f111d 100644 --- a/main.cpp +++ b/main.cpp @@ -101,7 +101,7 @@ auto bench_pgm(const std::vector& data, const std::vector& q volatile uint64_t res = 0; std::cout << "Construct PGM index eps_l=" << Epsilon << " eps_i=" << EpsilonRecursive << std::endl; - pgm::PGMIndex index_branchless(data.begin(), data.end()-1); + pgm::PGMIndex index_branchless(data.begin(), data.end()-1); // branchless PGM without last-mile search auto start = std::chrono::high_resolution_clock::now(); @@ -131,8 +131,9 @@ auto bench_pgm(const std::vector& data, const std::vector& q // to avoid influence of hot cache std::vector data_cpy(data); std::vector queries_cpy(queries); + std::cout << "Construct PGM index eps_l=" << Epsilon << " eps_i=" << EpsilonRecursive << std::endl; - pgm::PGMIndex index(data_cpy.begin(), data_cpy.end()-1); + pgm::PGMIndex index(data_cpy.begin(), data_cpy.end()-1); // branchy PGM without last-mile search start = std::chrono::high_resolution_clock::now(); for (auto q : queries_cpy) { @@ -166,13 +167,18 @@ int main(int argc, const char * argv[]) { // bench_search_repeat(20, 500, "/Users/liuqiyu/Desktop/bench_search_result_new.csv"); // exit(0); - const std::string fname = "/Users/liuqiyu/Desktop/SOSD_data/books_800M_uint64"; - const size_t nq = 200; + const std::string fname = "/Users/liuqiyu/Desktop/SOSD_data/osm_cellids_800M_uint64"; + const size_t nq = 500; const size_t repeat = 10; std::cout << "Load data from " << fname << std::endl; - auto data = benchmark::load_data(fname); - std::sort(data.begin(), data.end()-1); + auto data = benchmark::load_data(fname, true, 200000000); + std::sort(data.begin(), data.end()); + + auto data_stats = benchmark::get_data_stats(data); + std::cout << "mean: " << data_stats.mean + << " variance: " << data_stats.var + << " hardness ratio: " << data_stats.var/data_stats.mean << std::endl; std::vector> bench_results; @@ -243,7 +249,7 @@ int main(int argc, const char * argv[]) { } // start from 7 cold cache config - std::ofstream ofs("/Users/liuqiyu/Desktop/bench_pgm_result_books_repeat_10_0250.csv"); + std::ofstream ofs("/Users/liuqiyu/Desktop/bench_pgm_result_osm_repeat_10_2103.csv"); ofs << "round,eps_l,eps_i,levels,lls,ils,latency_branchy_i,latency_branchy_l,latency_branchless_i,latency_branchless_l" << std::endl; for (auto br : bench_results) { diff --git a/utils.h b/utils.h index 6c6c230..a8e85de 100644 --- a/utils.h +++ b/utils.h @@ -22,8 +22,7 @@ static uint64_t timing(std::function fn) { // Loads values from binary file into vector. template -static std::vector load_data(const std::string& filename, - bool print = true) { +static std::vector load_data(const std::string& filename, bool print = true, size_t sample_size = 0) { std::vector data; const uint64_t ns = timing([&] { std::ifstream in(filename, std::ios::binary); @@ -46,8 +45,16 @@ static std::vector load_data(const std::string& filename, << ms << " ms (" << static_cast(data.size()) / 1000 / ms << " M values/s)" << std::endl; } - - return data; + + if (sample_size > 0) { + std::random_device rd; + std::mt19937 gen(rd()); + std::vector sample; + std::sample(data.begin(), data.end()-1, std::back_inserter(sample), sample_size, gen); + return sample; + } else { + return data; + } } template @@ -58,8 +65,10 @@ auto get_data_stats(const std::vector& data) { } const auto n = gaps.size(); double mean = std::accumulate(gaps.begin(), gaps.end(), 0.0)/n; - double sq_sum = std::inner_product(gaps.begin(), gaps.end(), gaps.begin(), 0.0); - double var = sq_sum/n - mean*mean; + double sq_sum = std::accumulate(gaps.begin(), gaps.end(), 0.0, [mean](double acc, double val) { + return acc + (val - mean) * (val - mean); + }); + double var = sq_sum/n; struct data_stats {double mean; double var;}; return data_stats {mean, var}; }