From c99cd82c61879503e99e47b1953c6ba5ee32cfdb Mon Sep 17 00:00:00 2001 From: Yenaled Date: Sun, 22 Jan 2023 13:38:40 -0800 Subject: [PATCH 01/49] Clean up bustools count Also added count split option (but not implemented yet) --- src/Common.hpp | 1 + src/bustools_count.cpp | 381 ++++++++++++----------------------------- src/bustools_main.cpp | 10 ++ 3 files changed, 123 insertions(+), 269 deletions(-) diff --git a/src/Common.hpp b/src/Common.hpp index f67ee5c..e6d13c7 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -62,6 +62,7 @@ struct Bustools_opt std::string count_genes; std::string count_ecs; std::string count_txp; + std::string count_split; bool count_em = false; bool count_cm = false; bool count_collapse = false; diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index e5961e5..244ba45 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -22,7 +22,11 @@ void bustools_count(Bustools_opt &opt) { std::vector> ecmap; std::unordered_map txnames; + auto txnames_split = txnames; // copy parseTranscripts(opt.count_txp, txnames); + if (!opt.count_split.empty()) { + parseTranscripts(opt.count_split, txnames_split); // subset of txnames + } std::vector genemap(txnames.size(), -1); std::unordered_map genenames; parseGenes(opt.count_genes, txnames, genemap, genenames); @@ -437,253 +441,8 @@ void bustools_count(Bustools_opt &opt) { of << n_rows << " " << (x+1) << " " << val << "\n"; } }; - - for (const auto& infn : opt.files) { - std::streambuf *inbuf; - std::ifstream inf; - if (!opt.stream_in) { - inf.open(infn.c_str(), std::ios::binary); - inbuf = inf.rdbuf(); - } else { - inbuf = std::cin.rdbuf(); - } - std::istream in(inbuf); - - parseHeader(in, h); - bclen = h.bclen; - - int rc = 0; - while (true) { - in.read((char*)p, N*sizeof(BUSData)); - size_t rc = in.gcount() / sizeof(BUSData); - nr += rc; - if (rc == 0) { - break; - } - - - for (size_t i = 0; i < rc; i++) { - if (p[i].barcode != current_bc) { - // output whatever is in v - if (!v.empty()) { - if (!opt.count_collapse) { - write_barcode_matrix(v); - } else { - write_barcode_matrix_collapsed(v); - } - } - v.clear(); - current_bc = p[i].barcode; - } - v.push_back(p[i]); - - } - } - if (!v.empty()) { - if (!opt.count_collapse) { - write_barcode_matrix(v); - } else { - write_barcode_matrix_collapsed(v); - } - } - - if (!opt.stream_in) { - inf.close(); - } - } - delete[] p; p = nullptr; - - if (!opt.count_collapse) { - n_cols = ecmap.size(); - } else { - n_cols = genenames.size(); - } - - of.close(); - //Rewrite header in a way that works for both Windows and Linux - std::stringstream ss; - ss << n_rows << " " << n_cols << " " << n_entries; - std::string header = ss.str(); - int hlen = header.size(); - header = header + std::string(66 - hlen, ' ') + '\n'; - of.open(mtx_ofn, std::ios::in | std::ios::out); - of << headerComments << header; - of.close(); - - // write updated ec file - h.ecs = std::move(ecmap); - if (!opt.count_collapse) { - writeECs(ec_ofn, h); - } else { - writeGenes(gene_ofn, genenames); - } - // write barcode file - std::ofstream bcof; - bcof.open(barcodes_ofn); - for (const auto &x : barcodes) { - bcof << binaryToString(x, bclen) << "\n"; - } - bcof.close(); - - //write histogram file - if (opt.count_gen_hist) { - std::ofstream histof; - histof.open(hist_ofn); - - for (size_t g = 0; g < genenames.size(); ++g) { - //Indexed as gene*histmax + histIndex - unsigned int offs = g * histmax; - - //first figure out the length of the histogram, don't write that to make the file smaller - unsigned int histEnd = histmax - 1; - for (; histEnd != 0; --histEnd) { - if (histograms[offs + histEnd] != 0) { - break; - } - } - for (size_t c = 0; c <= histEnd; ++c) { - if (c != 0) { - histof << '\t'; - } - histof << histograms[offs + c]; - } - - histof << "\n"; - } - histof.close(); - } - - if (opt.count_gen_hist) { - //write mean counts per UMI file (per gene) - - std::ofstream cuof; - cuof.open(cu_ofn); - //write header - cuof << "gene\tCU\tUMIs\n"; - - //prepare gene names for writing - std::vector names; - names.resize(genenames.size()); - for (const auto &x : genenames) { - if (x.second >= 0) { - names[x.second] = x.first; - } - } - - - for (size_t g = 0; g < genenames.size(); ++g) { - //Indexed as gene*histmax + histIndex - unsigned int offs = g * histmax; - - //calculate counts per UMI as the mean of the histogram - double wsum = 0; - double sum = 0; - for (size_t c = 0; c < histmax; ++c) { - wsum += double(c+1) * histograms[offs + c]; - sum += histograms[offs + c]; - } - double cu = wsum/sum; - if (sum == 0) { - cuof << names[g] << '\t' << "NA" << '\t' << sum << '\n'; - } else { - cuof << names[g] << '\t' << cu << '\t' << sum << '\n'; - } - } - cuof.close(); - - //write cu per cell file - - std::ofstream cupcof; - cupcof.open(cu_per_cell_ofn); - //write header - cupcof << "barcode\tCU\tUMIs\n"; - - for (size_t bc = 0; bc < barcodes.size(); ++bc) { - cupcof << binaryToString(barcodes[bc], bclen) << '\t' << double(cellCounts[bc]) / double(cellUMIs[bc]) << '\t' << cellUMIs[bc] << '\n'; - } - cupcof.close(); - } - - - //std::cerr << "bad counts = " << bad_count <<", rescued =" << rescued << ", compacted = " << compacted << std::endl; - - //std::cerr << "Read in " << nr << " BUS records" << std::endl; -} - -void bustools_count_mult(Bustools_opt &opt) { - BUSHeader h; - size_t nr = 0; - size_t N = 100000; - uint32_t bclen = 0; - BUSData* p = new BUSData[N]; - - // read and parse the equivalence class files - - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; - std::vector> ecmap; - - std::unordered_map txnames; - parseTranscripts(opt.count_txp, txnames); - std::vector genemap(txnames.size(), -1); - std::unordered_map genenames; - parseGenes(opt.count_genes, txnames, genemap, genenames); - parseECs(opt.count_ecs, h); - ecmap = std::move(h.ecs); - ecmapinv.reserve(ecmap.size()); - for (int32_t ec = 0; ec < ecmap.size(); ec++) { - ecmapinv.insert({ecmap[ec], ec}); - } - std::vector> ec2genes; - create_ec2genes(ecmap, genemap, ec2genes); - - - std::ofstream of; - std::string mtx_ofn = opt.output + ".mtx"; - std::string barcodes_ofn = opt.output + ".barcodes.txt"; - std::string ec_ofn = opt.output + ".ec.txt"; - std::string gene_ofn = opt.output + ".genes.txt"; - of.open(mtx_ofn); - - // write out the initial header - of << "%%MatrixMarket matrix coordinate real general\n%\n"; - // number of genes - auto mat_header_pos = of.tellp(); - std::string dummy_header(66, '\n'); - for (int i = 0; i < 33; i++) { - dummy_header[2*i] = '%'; - } - of.write(dummy_header.c_str(), dummy_header.size()); - - - size_t n_cols = 0; - size_t n_rows = 0; - size_t n_entries = 0; - std::vector v; - v.reserve(N); - uint64_t current_bc = 0xFFFFFFFFFFFFFFFFULL; - //temporary data - std::vector ecs; - std::vector glist; - ecs.reserve(100); - std::vector u; - u.reserve(100); - std::vector column_v; - std::vector> column_vp; - if (!opt.count_collapse) { - column_vp.reserve(N); - } else { - column_vp.reserve(N); - glist.reserve(100); - } - //barcodes - std::vector barcodes; - int bad_count = 0; - int compacted = 0; - int rescued = 0; - - - auto write_barcode_matrix = [&](const std::vector &v) { + auto write_barcode_matrix_mult = [&](const std::vector &v) { if(v.empty()) { return; } @@ -724,7 +483,7 @@ void bustools_count_mult(Bustools_opt &opt) { } }; - auto write_barcode_matrix_collapsed = [&](const std::vector &v) { + auto write_barcode_matrix_collapsed_mult = [&](const std::vector &v) { if(v.empty()) { return; } @@ -734,7 +493,7 @@ void bustools_count_mult(Bustools_opt &opt) { barcodes.push_back(v[0].barcode); double val = 0.0; size_t n = v.size(); - + for (size_t i = 0; i < n; i++) { ecs.resize(0); ecs.push_back(v[i].ec); @@ -753,12 +512,12 @@ void bustools_count_mult(Bustools_opt &opt) { } } } - + std::sort(column_vp.begin(), column_vp.end()); size_t m = column_vp.size(); std::unordered_map col_map(m); std::vector cols; - + for (size_t i = 0; i < m; ) { size_t j = i+1; double val = column_vp[i].second; @@ -770,14 +529,12 @@ void bustools_count_mult(Bustools_opt &opt) { } col_map.insert({column_vp[i].first,val}); cols.push_back(column_vp[i].first); - + n_entries++; i = j; // increment } - - - + for (const auto &x : cols) { double val = 0; auto it = col_map.find(x); @@ -786,7 +543,7 @@ void bustools_count_mult(Bustools_opt &opt) { } of << n_rows << " " << (x+1) << " " << val << "\n"; } - + }; for (const auto& infn : opt.files) { @@ -818,11 +575,13 @@ void bustools_count_mult(Bustools_opt &opt) { // output whatever is in v if (!v.empty()) { if (!opt.count_collapse) { - write_barcode_matrix(v); - } else { - write_barcode_matrix_collapsed(v); - } + if (!opt.count_cm) write_barcode_matrix(v); + else write_barcode_matrix_mult(v); + } else { + if (!opt.count_cm) write_barcode_matrix_collapsed(v); + else write_barcode_matrix_collapsed_mult(v); } + } v.clear(); current_bc = p[i].barcode; } @@ -832,9 +591,11 @@ void bustools_count_mult(Bustools_opt &opt) { } if (!v.empty()) { if (!opt.count_collapse) { - write_barcode_matrix(v); + if (!opt.count_cm) write_barcode_matrix(v); + else write_barcode_matrix_mult(v); } else { - write_barcode_matrix_collapsed(v); + if (!opt.count_cm) write_barcode_matrix_collapsed(v); + else write_barcode_matrix_collapsed_mult(v); } } @@ -852,17 +613,14 @@ void bustools_count_mult(Bustools_opt &opt) { of.close(); + //Rewrite header in a way that works for both Windows and Linux std::stringstream ss; - ss << n_rows << " " << n_cols << " " << n_entries << "\n"; + ss << n_rows << " " << n_cols << " " << n_entries; std::string header = ss.str(); int hlen = header.size(); - assert(hlen < 66); - of.open(mtx_ofn, std::ios::binary | std::ios::in | std::ios::out); - of.seekp(mat_header_pos); - of.write("%",1); - of.write(std::string(66-hlen-2,' ').c_str(),66-hlen-2); - of.write("\n",1); - of.write(header.c_str(), hlen); + header = header + std::string(66 - hlen, ' ') + '\n'; + of.open(mtx_ofn, std::ios::in | std::ios::out); + of << headerComments << header; of.close(); // write updated ec file @@ -879,7 +637,92 @@ void bustools_count_mult(Bustools_opt &opt) { bcof << binaryToString(x, bclen) << "\n"; } bcof.close(); + + //write histogram file + if (opt.count_gen_hist) { + std::ofstream histof; + histof.open(hist_ofn); + + for (size_t g = 0; g < genenames.size(); ++g) { + //Indexed as gene*histmax + histIndex + unsigned int offs = g * histmax; + + //first figure out the length of the histogram, don't write that to make the file smaller + unsigned int histEnd = histmax - 1; + for (; histEnd != 0; --histEnd) { + if (histograms[offs + histEnd] != 0) { + break; + } + } + for (size_t c = 0; c <= histEnd; ++c) { + if (c != 0) { + histof << '\t'; + } + histof << histograms[offs + c]; + } + + histof << "\n"; + } + histof.close(); + } + + if (opt.count_gen_hist) { + //write mean counts per UMI file (per gene) + + std::ofstream cuof; + cuof.open(cu_ofn); + //write header + cuof << "gene\tCU\tUMIs\n"; + + //prepare gene names for writing + std::vector names; + names.resize(genenames.size()); + for (const auto &x : genenames) { + if (x.second >= 0) { + names[x.second] = x.first; + } + } + + + for (size_t g = 0; g < genenames.size(); ++g) { + //Indexed as gene*histmax + histIndex + unsigned int offs = g * histmax; + + //calculate counts per UMI as the mean of the histogram + double wsum = 0; + double sum = 0; + for (size_t c = 0; c < histmax; ++c) { + wsum += double(c+1) * histograms[offs + c]; + sum += histograms[offs + c]; + } + double cu = wsum/sum; + if (sum == 0) { + cuof << names[g] << '\t' << "NA" << '\t' << sum << '\n'; + } else { + cuof << names[g] << '\t' << cu << '\t' << sum << '\n'; + } + } + cuof.close(); + + //write cu per cell file + + std::ofstream cupcof; + cupcof.open(cu_per_cell_ofn); + //write header + cupcof << "barcode\tCU\tUMIs\n"; + + for (size_t bc = 0; bc < barcodes.size(); ++bc) { + cupcof << binaryToString(barcodes[bc], bclen) << '\t' << double(cellCounts[bc]) / double(cellUMIs[bc]) << '\t' << cellUMIs[bc] << '\n'; + } + cupcof.close(); + } + + //std::cerr << "bad counts = " << bad_count <<", rescued =" << rescued << ", compacted = " << compacted << std::endl; //std::cerr << "Read in " << nr << " BUS records" << std::endl; } + +void bustools_count_mult(Bustools_opt &opt) { + bustools_count(opt); +} diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index ab954a8..853bcd2 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -1770,6 +1770,15 @@ bool check_ProgramOptions_count(Bustools_opt &opt) } } + if (opt.count_split.size() != 0) + { + if (!checkFileExists(opt.count_split)) + { + std::cerr << "Error: File not found " << opt.count_split << std::endl; + ret = false; + } + } + return ret; } @@ -2657,6 +2666,7 @@ void Bustools_count_Usage() << " --umi-gene Perform gene-level collapsing of UMIs" << std::endl << " --em Estimate gene abundances using EM algorithm" << std::endl << " --cm Count multiplicites instead of UMIs" << std::endl + << "-s, --split Split output matrix in two (plus ambiguous) based on transcripts supplied in this file" << std::endl << "-m, --multimapping Include bus records that pseudoalign to multiple genes" << std::endl << " --hist Output copy per UMI histograms for all genes" << std::endl << "-d --downsample Specify a factor between 0 and 1 specifying how much to downsample" << std::endl From 26999ad51f8d9959b1e32c85c5999dbbf6f36fe7 Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 10:16:06 -0800 Subject: [PATCH 02/49] Preliminary untested count split matrix option --- src/Common.cpp | 27 ++++++ src/Common.hpp | 8 ++ src/bustools_count.cpp | 212 +++++++++++++++++++++++------------------ src/bustools_main.cpp | 4 + 4 files changed, 160 insertions(+), 91 deletions(-) diff --git a/src/Common.cpp b/src/Common.cpp index ad96c96..717ee24 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -327,6 +327,33 @@ void create_ec2genes(const std::vector> &ecmap, const std:: } } +COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector> &ecmap, const std::vector& tx_split) { + if (tx_split.size() == 0) return COUNT_DEFAULT; + std::vector ecs; + ecs.push_back(ec); + return intersect_ecs_with_subset_txs(ecs, ecmap, tx_split); +} + +COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector& ecs, const std::vector> &ecmap, const std::vector& tx_split) { + if (tx_split.size() == 0) return COUNT_DEFAULT; + if (ecs.size() == 0) return COUNT_AMBIGUOUS; // Shouldn't happen + size_t n_1 = 0; + size_t n_2 = 0; + for (auto ec : ecs) { // We still need to optimize this + for (auto t: ecmap[ec]) { + if(std::find(tx_split.begin(), tx_split.end(), t) != tx_split.end()) { + n_2++; + } else { + n_1++; + } + if (n_1 > 0 && n_2 > 0) break; // Stop searching + } + if (n_1 > 0 && n_2 > 0) break; // Stop searching + } + return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT)); +} + + void copy_file(std::string src, std::string dest) { std::ifstream isrc(src, std::ios::binary); std::ofstream idest(dest, std::ios::binary); diff --git a/src/Common.hpp b/src/Common.hpp index e6d13c7..006dd80 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -35,6 +35,12 @@ enum PROJECT_TYPE : char PROJECT_TX, PROJECT_F }; +enum COUNT_MTX_TYPE : char +{ + COUNT_DEFAULT = 0, + COUNT_SPLIT, + COUNT_AMBIGUOUS +}; struct Bustools_opt { @@ -168,6 +174,8 @@ void vt2gene(const std::vector &v, const std::vector &genemap, void intersect_genes_of_ecs(const std::vector &ecs, const std::vector> &ec2genes, std::vector &glist); int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, std::unordered_map, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); void create_ec2genes(const std::vector> &ecmap, const std::vector &genemap, std::vector> &ec2gene); +COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector> &ecmap, const std::vector& tx_split); +COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector& ecs, const std::vector> &ecmap, const std::vector& tx_split); void copy_file(std::string src, std::string dest); diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index 244ba45..e0d787c 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -23,10 +23,10 @@ void bustools_count(Bustools_opt &opt) { std::unordered_map txnames; auto txnames_split = txnames; // copy + std::vector tx_split; + tx_split.reserve(txnames_split.size()); + for (auto x : txnames_split) tx_split.push_back(txnames[x.first]); parseTranscripts(opt.count_txp, txnames); - if (!opt.count_split.empty()) { - parseTranscripts(opt.count_split, txnames_split); // subset of txnames - } std::vector genemap(txnames.size(), -1); std::unordered_map genenames; parseGenes(opt.count_genes, txnames, genemap, genenames); @@ -40,15 +40,19 @@ void bustools_count(Bustools_opt &opt) { create_ec2genes(ecmap, genemap, ec2genes); + bool count_split = !opt.count_split.empty(); std::ofstream of; + std::ofstream of_2; + std::ofstream of_A; std::string mtx_ofn = opt.output + ".mtx"; + std::string mtx_ofn_split_2 = opt.output + ".2.mtx"; + std::string mtx_ofn_split_A = opt.output + ".ambiguous.mtx"; std::string barcodes_ofn = opt.output + ".barcodes.txt"; std::string ec_ofn = opt.output + ".ec.txt"; std::string gene_ofn = opt.output + ".genes.txt"; std::string hist_ofn = opt.output + ".hist.txt"; std::string cu_per_cell_ofn = opt.output + ".CUPerCell.txt"; std::string cu_ofn = opt.output + ".cu.txt"; - of.open(mtx_ofn); // write out the initial header // keep the number of newlines constant, this way it will work for both Windows and Linux @@ -58,12 +62,22 @@ void bustools_count(Bustools_opt &opt) { ssHeader << headerComments; ssHeader << std::string(66, '%') << '\n'; size_t headerLength = ssHeader.str().length(); + // If we need to split matrix + if (count_split) { + parseTranscripts(opt.count_split, txnames_split); // subset of txnames + of_2.open(mtx_ofn_split_2); + of_A.open(mtx_ofn_split_A); + of_2 << ssHeader.str(); + of_A << ssHeader.str(); + } + of.open(mtx_ofn); of << ssHeader.str(); - size_t n_cols = 0; size_t n_rows = 0; size_t n_entries = 0; + size_t n_entries_2 = 0; + size_t n_entries_A = 0; std::vector v; v.reserve(N); uint64_t current_bc = 0xFFFFFFFFFFFFFFFFULL; @@ -74,7 +88,7 @@ void bustools_count(Bustools_opt &opt) { std::vector u; u.reserve(100); std::vector column_v; - std::vector> column_vp; + std::vector>> column_vp; // gene, {count, matrix type} if (!opt.count_collapse) { column_v.reserve(N); } else { @@ -217,8 +231,11 @@ void bustools_count(Bustools_opt &opt) { } } double val = j-i; - of << n_rows << " " << (column_v[i]+1) << " " << val << "\n"; - n_entries++; + auto which_mtx = intersect_ecs_with_subset_txs(column_v[i], ecmap, tx_split); + auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A); + auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A); + of_ << n_rows << " " << (column_v[i]+1) << " " << val << "\n"; + n_entries_++; i = j; // increment } @@ -239,7 +256,7 @@ void bustools_count(Bustools_opt &opt) { std::vector> ambiguous_genes; - for (size_t i = 0; i < n; ) { + if (!opt.count_cm) for (size_t i = 0; i < n; ) { // Entire loop is for !opt.count_cm size_t j = i+1; for (; j < n; j++) { if (v[i].UMI != v[j].UMI) { @@ -270,9 +287,10 @@ void bustools_count(Bustools_opt &opt) { } } if (gn > 0) { + auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split); if (opt.count_gene_multimapping) { for (auto x : glist) { - column_vp.push_back({x, (opt.count_raw_counts ? counts : 1.0)/gn}); + column_vp.push_back({x, {(opt.count_raw_counts ? counts : 1.0)/gn, which_mtx}}); } //Fill in histograms for prediction. if (opt.count_gen_hist) { @@ -288,7 +306,7 @@ void bustools_count(Bustools_opt &opt) { } } else { if (gn==1) { - column_vp.push_back({glist[0],opt.count_raw_counts ? counts : 1.0}); + column_vp.push_back({glist[0],{opt.count_raw_counts ? counts : 1.0, which_mtx}}); //Fill in histograms for prediction. if (opt.count_gen_hist) { if (glist[0] < n_genes) { //crasches with an invalid gene file otherwise @@ -334,13 +352,14 @@ void bustools_count(Bustools_opt &opt) { } gn = glist.size(); if (gn > 0) { + auto which_mtx = intersect_ecs_with_subset_txs(ecs_within_molecule, ecmap, tx_split); if (opt.count_gene_multimapping) { for (auto x : glist) { - column_vp.push_back({x, 1.0/gn}); + column_vp.push_back({x, {1.0/gn, which_mtx}}); } } else { if (gn==1) { - column_vp.push_back({glist[0],1.0}); + column_vp.push_back({glist[0],{1.0, which_mtx}}); } else if (opt.count_em) { ambiguous_genes.push_back(std::move(glist)); } @@ -349,25 +368,64 @@ void bustools_count(Bustools_opt &opt) { } } i = j; // increment + } else for (size_t i = 0; i < n; i++) { // Entire loop is for opt.count_cm + ecs.resize(0); + ecs.push_back(v[i].ec); + + intersect_genes_of_ecs(ecs, ec2genes, glist); + int gn = glist.size(); + if (gn > 0) { + auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split); + if (opt.count_gene_multimapping) { + for (auto x : glist) { + column_vp.push_back({x, {v[i].count/gn, which_mtx}}); + } + } else { + if (gn==1) { + column_vp.push_back({glist[0],{v[i].count, which_mtx}}); + } + } + } } std::sort(column_vp.begin(), column_vp.end()); size_t m = column_vp.size(); std::unordered_map col_map(m); + auto col_map_2 = col_map; // copy + auto col_map_A = col_map; // copy std::vector cols; for (size_t i = 0; i < m; ) { size_t j = i+1; - double val = column_vp[i].second; + double val = 0; + double val_2 = 0; + double val_A = 0; + auto mtx_type = column_vp[i].second.second; + if (mtx_type == COUNT_DEFAULT) val = column_vp[i].second.first; + else if (mtx_type == COUNT_SPLIT) val_2 = column_vp[i].second.first; + else val_A = column_vp[i].second.first; for (; j < m; j++) { if (column_vp[i].first != column_vp[j].first) { break; } - val += column_vp[j].second; + auto mtx_type = column_vp[j].second.second; + if (mtx_type == COUNT_DEFAULT) val += column_vp[j].second.first; + else if (mtx_type == COUNT_SPLIT) val_2 += column_vp[j].second.first; + else val_A += column_vp[j].second.first; } col_map.insert({column_vp[i].first,val}); + if (count_split) { + col_map_2.insert({column_vp[i].first,val_2}); + col_map_A.insert({column_vp[i].first,val_A}); + } cols.push_back(column_vp[i].first); - n_entries++; + if (count_split) { + if (val > 0) n_entries++; + if (val_2 > 0) n_entries_2++; + if (val_A > 0) n_entries_A++; + } else { + n_entries++; + } i = j; // increment } @@ -380,7 +438,7 @@ void bustools_count(Bustools_opt &opt) { double val = 0; auto it = col_map.find(x); if (it != col_map.end()) { - val = it->second; + val = it->second.first; } c1.insert({x,val}); c2.insert({x,0.0}); @@ -430,15 +488,28 @@ void bustools_count(Bustools_opt &opt) { } - - for (const auto &x : cols) { double val = 0; auto it = col_map.find(x); - if (it != col_map.end()) { - val = it->second; + if (!count_split) { + if (it != col_map.end()) val = it->second; + of << n_rows << " " << (x+1) << " " << val << "\n"; + } else { + if (it != col_map.end()) { + val = it->second; + of << n_rows << " " << (x+1) << " " << val << "\n"; + } + it = col_map_2.find(x); + if (it != col_map_2.end()) { + val = it->second; + of_2 << n_rows << " " << (x+1) << " " << val << "\n"; + } + it = col_map_A.find(x); + if (it != col_map_A.end()) { + val = it->second; + of_A << n_rows << " " << (x+1) << " " << val << "\n"; + } } - of << n_rows << " " << (x+1) << " " << val << "\n"; } }; @@ -464,7 +535,7 @@ void bustools_count(Bustools_opt &opt) { continue; } } - column_vp.push_back({ec,v[i].count}); + column_vp.push_back({ec,{v[i].count,0}}); } std::sort(column_vp.begin(), column_vp.end()); size_t m = column_vp.size(); @@ -477,74 +548,14 @@ void bustools_count(Bustools_opt &opt) { } val += column_vp[j].second; } - n_entries++; - of << n_rows << " " << (column_vp[i].first+1) << " " << val << "\n"; + auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split); + auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A); + auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A); + of_ << n_rows << " " << (column_vp[i].first+1) << " " << val << "\n"; + n_entries_++; i = j; // increment } }; - - auto write_barcode_matrix_collapsed_mult = [&](const std::vector &v) { - if(v.empty()) { - return; - } - column_vp.resize(0); - n_rows+= 1; - - barcodes.push_back(v[0].barcode); - double val = 0.0; - size_t n = v.size(); - - for (size_t i = 0; i < n; i++) { - ecs.resize(0); - ecs.push_back(v[i].ec); - - intersect_genes_of_ecs(ecs, ec2genes, glist); - int gn = glist.size(); - if (gn > 0) { - if (opt.count_gene_multimapping) { - for (auto x : glist) { - column_vp.push_back({x, v[i].count/gn}); - } - } else { - if (gn==1) { - column_vp.push_back({glist[0],v[i].count}); - } - } - } - } - - std::sort(column_vp.begin(), column_vp.end()); - size_t m = column_vp.size(); - std::unordered_map col_map(m); - std::vector cols; - - for (size_t i = 0; i < m; ) { - size_t j = i+1; - double val = column_vp[i].second; - for (; j < m; j++) { - if (column_vp[i].first != column_vp[j].first) { - break; - } - val += column_vp[j].second; - } - col_map.insert({column_vp[i].first,val}); - cols.push_back(column_vp[i].first); - - n_entries++; - - i = j; // increment - } - - for (const auto &x : cols) { - double val = 0; - auto it = col_map.find(x); - if (it != col_map.end()) { - val = it->second; - } - of << n_rows << " " << (x+1) << " " << val << "\n"; - } - - }; for (const auto& infn : opt.files) { std::streambuf *inbuf; @@ -578,8 +589,7 @@ void bustools_count(Bustools_opt &opt) { if (!opt.count_cm) write_barcode_matrix(v); else write_barcode_matrix_mult(v); } else { - if (!opt.count_cm) write_barcode_matrix_collapsed(v); - else write_barcode_matrix_collapsed_mult(v); + write_barcode_matrix_collapsed(v); // Same signature for count_cm and !count_cm } } v.clear(); @@ -612,9 +622,13 @@ void bustools_count(Bustools_opt &opt) { } of.close(); + if (count_split) { + of_2.close(); + of_A.close(); + } //Rewrite header in a way that works for both Windows and Linux - std::stringstream ss; + std::stringstream ss, ss_2, ss_A; ss << n_rows << " " << n_cols << " " << n_entries; std::string header = ss.str(); int hlen = header.size(); @@ -622,6 +636,22 @@ void bustools_count(Bustools_opt &opt) { of.open(mtx_ofn, std::ios::in | std::ios::out); of << headerComments << header; of.close(); + if (count_split) { + ss_2 << n_rows << " " << n_cols << " " << n_entries_2; + header = ss_2.str(); + hlen = header.size(); + header = header + std::string(66 - hlen, ' ') + '\n'; + of_2.open(mtx_ofn_split_2, std::ios::in | std::ios::out); + of_2 << headerComments << header; + of_2.close(); + ss_A << n_rows << " " << n_cols << " " << n_entries_A; + header = ss_A.str(); + hlen = header.size(); + header = header + std::string(66 - hlen, ' ') + '\n'; + of_A.open(mtx_ofn_split_A, std::ios::in | std::ios::out); + of_A << headerComments << header; + of_A.close(); + } // write updated ec file h.ecs = std::move(ecmap); diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index 853bcd2..2ffd492 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -1777,6 +1777,10 @@ bool check_ProgramOptions_count(Bustools_opt &opt) std::cerr << "Error: File not found " << opt.count_split << std::endl; ret = false; } + if (opt.count_em) { + std::cerr << "Cannot use -s with --em" std::endl; + ret = false; + } } return ret; From 53b0acc550c9b74289b84faf60489fdfd2998f62 Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 11:52:31 -0800 Subject: [PATCH 03/49] fix some typos --- src/bustools_count.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index e0d787c..68e9bab 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -438,7 +438,7 @@ void bustools_count(Bustools_opt &opt) { double val = 0; auto it = col_map.find(x); if (it != col_map.end()) { - val = it->second.first; + val = it->second; } c1.insert({x,val}); c2.insert({x,0.0}); @@ -535,18 +535,18 @@ void bustools_count(Bustools_opt &opt) { continue; } } - column_vp.push_back({ec,{v[i].count,0}}); + column_vp.push_back({ec,{v[i].count,COUNT_DEFAULT}}); } std::sort(column_vp.begin(), column_vp.end()); size_t m = column_vp.size(); for (size_t i = 0; i < m; ) { size_t j = i+1; - double val = column_vp[i].second; + double val = column_vp[i].second.first; for (; j < m; j++) { if (column_vp[i].first != column_vp[j].first) { break; } - val += column_vp[j].second; + val += column_vp[j].second.first; } auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split); auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A); @@ -557,7 +557,7 @@ void bustools_count(Bustools_opt &opt) { } }; - for (const auto& infn : opt.files) { + for (const auto& infn : opt.files) { std::streambuf *inbuf; std::ifstream inf; if (!opt.stream_in) { @@ -604,8 +604,7 @@ void bustools_count(Bustools_opt &opt) { if (!opt.count_cm) write_barcode_matrix(v); else write_barcode_matrix_mult(v); } else { - if (!opt.count_cm) write_barcode_matrix_collapsed(v); - else write_barcode_matrix_collapsed_mult(v); + write_barcode_matrix_collapsed(v); // Same signature for count_cm and !count_cm } } From 0d8cec30ffbcf983685e944324abc9cd4740c997 Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 11:53:33 -0800 Subject: [PATCH 04/49] fix another typo --- src/bustools_main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index 2ffd492..384dc2f 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -1778,7 +1778,7 @@ bool check_ProgramOptions_count(Bustools_opt &opt) ret = false; } if (opt.count_em) { - std::cerr << "Cannot use -s with --em" std::endl; + std::cerr << "Cannot use -s with --em" << std::endl; ret = false; } } From a2f38379bdc606effd27d5113c290f382be23f20 Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 15:32:53 -0800 Subject: [PATCH 05/49] change unordered_map --- src/BUSData.cpp | 10 +- src/BUSData.h | 11 +- src/Common.hpp | 6 +- src/bustools_capture.cpp | 4 +- src/bustools_clusterhist.cpp | 8 +- src/bustools_collapse.cpp | 6 +- src/bustools_count.cpp | 10 +- src/bustools_inspect.cpp | 4 +- src/bustools_mash.cpp | 6 +- src/bustools_merge.cpp | 4 +- src/bustools_project.cpp | 12 +- src/bustools_umicorrect.cpp | 6 +- src/robin_hood.h | 2544 ++++++++++++++++++++++++++++++++++ 13 files changed, 2589 insertions(+), 42 deletions(-) create mode 100644 src/robin_hood.h diff --git a/src/BUSData.cpp b/src/BUSData.cpp index c97f80f..397e6dd 100644 --- a/src/BUSData.cpp +++ b/src/BUSData.cpp @@ -258,7 +258,7 @@ bool writeECs(const std::string &filename, const BUSHeader &header) { return true; } -bool writeGenes(const std::string &filename, const std::unordered_map &genenames) { +bool writeGenes(const std::string &filename, const u_map_ &genenames) { std::ofstream outf; outf.open(filename.c_str(), std::ios::out); @@ -279,7 +279,7 @@ bool writeGenes(const std::string &filename, const std::unordered_map &txnames) { +bool parseTranscripts(const std::string &filename, u_map_ &txnames) { std::ifstream inf(filename.c_str()); int i = 0; @@ -291,7 +291,7 @@ bool parseTranscripts(const std::string &filename, std::unordered_map &txnames, std::unordered_set &captures) { +bool parseTxCaptureList(const std::string &filename, u_map_ &txnames, std::unordered_set &captures) { std::ifstream inf(filename.c_str()); std::string txp; @@ -318,7 +318,7 @@ bool parseBcUmiCaptureList(const std::string &filename, std::unordered_set &project_map) { +bool parse_ProjectMap(const std::string &filename, u_map_ &project_map) { // This function occurs in 3 places: here, BUSData.h, and bustools_project.cpp std::ifstream inf(filename.c_str()); @@ -346,7 +346,7 @@ bool parseFlagsCaptureList(const std::string &filename, std::unordered_set &txnames, std::vector &genemap, std::unordered_map &genenames) { +bool parseGenes(const std::string &filename, const u_map_ &txnames, std::vector &genemap, u_map_ &genenames) { std::ifstream inf(filename.c_str()); std::string line, t; diff --git a/src/BUSData.h b/src/BUSData.h index 5c697f7..227f889 100644 --- a/src/BUSData.h +++ b/src/BUSData.h @@ -7,6 +7,7 @@ #include #include #include +#include "Common.hpp" const uint32_t BUSFORMAT_VERSION = 1; @@ -66,15 +67,15 @@ int identifyParseHeader(std::istream &inf, BUSHeader &header, compressed_BUSHead bool parseECs_stream(std::istream &in, BUSHeader &header); bool parseECs(const std::string &filename, BUSHeader &header); bool writeECs(const std::string &filename, const BUSHeader &header); -bool writeGenes(const std::string &filename, const std::unordered_map &genenames); -bool parseGenes(const std::string &filename, const std::unordered_map &txnames, std::vector &genemap, std::unordered_map &genenames); +bool writeGenes(const std::string &filename, const u_map_ &genenames); +bool parseGenes(const std::string &filename, const u_map_ &txnames, std::vector &genemap, u_map_ &genenames); bool parseGenesList(const std::string& filename, std::vector& geneNames); -bool parseTxCaptureList(const std::string &filename, std::unordered_map &txnames, std::unordered_set &captures); +bool parseTxCaptureList(const std::string &filename, u_map_ &txnames, std::unordered_set &captures); bool parseBcUmiCaptureList(const std::string &filename, std::unordered_set &captures); bool parseFlagsCaptureList(const std::string &filename, std::unordered_set &captures); -bool parseTranscripts(const std::string &filename, std::unordered_map &txnames); +bool parseTranscripts(const std::string &filename, u_map_ &txnames); -bool parse_ProjectMap(const std::string &filename, std::unordered_map &project_map); +bool parse_ProjectMap(const std::string &filename, u_map_ &project_map); uint64_t stringToBinary(const std::string &s, uint32_t &flag); uint64_t stringToBinary(const char* s, const size_t len, uint32_t &flag); diff --git a/src/Common.hpp b/src/Common.hpp index 006dd80..940a4a8 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -9,9 +9,11 @@ #include #include #include +#include "robin_hood.h" #define BUSTOOLS_VERSION "0.42.0" +typedef robin_hood::unordered_flat_map u_map_; enum CAPTURE_TYPE : char { CAPTURE_NONE = 0, @@ -169,10 +171,10 @@ struct SortedVectorHasher std::vector intersect(std::vector &u, std::vector &v); std::vector union_vectors(const std::vector> &v); std::vector intersect_vectors(const std::vector> &v); -int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, std::unordered_map, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes); +int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes); void vt2gene(const std::vector &v, const std::vector &genemap, std::vector &glist); void intersect_genes_of_ecs(const std::vector &ecs, const std::vector> &ec2genes, std::vector &glist); -int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, std::unordered_map, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); +int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); void create_ec2genes(const std::vector> &ecmap, const std::vector &genemap, std::vector> &ec2gene); COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector> &ecmap, const std::vector& tx_split); COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector& ecs, const std::vector> &ecmap, const std::vector& tx_split); diff --git a/src/bustools_capture.cpp b/src/bustools_capture.cpp index d03e1fd..a90ff2e 100644 --- a/src/bustools_capture.cpp +++ b/src/bustools_capture.cpp @@ -12,11 +12,11 @@ void bustools_capture(Bustools_opt &opt) { std::unordered_set captures; std::vector> ecmap; - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; + u_map_, int32_t, SortedVectorHasher> ecmapinv; if (opt.type == CAPTURE_TX) { // parse ecmap and capture list - std::unordered_map txnames; + u_map_ txnames; std::cerr << "Parsing transcripts .. "; std::cerr.flush(); parseTranscripts(opt.count_txp, txnames); std::cerr << "done" << std::endl; diff --git a/src/bustools_clusterhist.cpp b/src/bustools_clusterhist.cpp index 2bbb667..591eeeb 100644 --- a/src/bustools_clusterhist.cpp +++ b/src/bustools_clusterhist.cpp @@ -18,13 +18,13 @@ void bustools_clusterhist(Bustools_opt& opt) { // read and parse the equivelence class files - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; + u_map_, int32_t, SortedVectorHasher> ecmapinv; std::vector> ecmap; - std::unordered_map txnames; + u_map_ txnames; parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); - std::unordered_map genenames; + u_map_ genenames; parseGenes(opt.count_genes, txnames, genemap, genenames); parseECs(opt.count_ecs, h); ecmap = std::move(h.ecs); @@ -52,7 +52,7 @@ void bustools_clusterhist(Bustools_opt& opt) { //Read the cluster file std::vector clusterNames; - std::unordered_map bcClusters; + u_map_ bcClusters; { std::ifstream ifs(opt.cluster_input_file); uint32_t flag = 0; diff --git a/src/bustools_collapse.cpp b/src/bustools_collapse.cpp index 2111c9b..120d0ef 100644 --- a/src/bustools_collapse.cpp +++ b/src/bustools_collapse.cpp @@ -17,13 +17,13 @@ void bustools_collapse(Bustools_opt &opt) { // read and parse the equivelence class files - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; + u_map_, int32_t, SortedVectorHasher> ecmapinv; std::vector> ecmap; - std::unordered_map txnames; + u_map_ txnames; parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); - std::unordered_map genenames; + u_map_ genenames; parseGenes(opt.count_genes, txnames, genemap, genenames); parseECs(opt.count_ecs, h); ecmap = std::move(h.ecs); diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index 68e9bab..b4a367f 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -18,17 +18,17 @@ void bustools_count(Bustools_opt &opt) { // read and parse the equivalence class files - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; + u_map_, int32_t, SortedVectorHasher> ecmapinv; std::vector> ecmap; - std::unordered_map txnames; + u_map_ txnames; auto txnames_split = txnames; // copy std::vector tx_split; tx_split.reserve(txnames_split.size()); for (auto x : txnames_split) tx_split.push_back(txnames[x.first]); parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); - std::unordered_map genenames; + u_map_ genenames; parseGenes(opt.count_genes, txnames, genemap, genenames); parseECs(opt.count_ecs, h); ecmap = std::move(h.ecs); @@ -389,7 +389,7 @@ void bustools_count(Bustools_opt &opt) { } std::sort(column_vp.begin(), column_vp.end()); size_t m = column_vp.size(); - std::unordered_map col_map(m); + u_map_ col_map(m); auto col_map_2 = col_map; // copy auto col_map_A = col_map; // copy std::vector cols; @@ -432,7 +432,7 @@ void bustools_count(Bustools_opt &opt) { if (opt.count_em) { //std::cerr << "Running EM algorithm" << std::endl; - std::unordered_map c1,c2; + u_map_ c1,c2; // initialize with unique counts for (const auto &x : cols) { double val = 0; diff --git a/src/bustools_inspect.cpp b/src/bustools_inspect.cpp index 25694cf..2808474 100644 --- a/src/bustools_inspect.cpp +++ b/src/bustools_inspect.cpp @@ -100,7 +100,7 @@ void bustools_inspect(Bustools_opt &opt) { int64_t gt_records = 0; /* Frequency of number of targets per set, with multiplicity. */ - std::unordered_map freq_targetsPerSet; + u_map_ freq_targetsPerSet; /* Frequency of targets (for Good-Toulmin). */ std::vector freq_targets(numTargets, 0); @@ -273,7 +273,7 @@ void bustools_inspect(Bustools_opt &opt) { // Good-Toulmin for number of targets // Also number of targets detected uint64_t targetsDetected = 0; - std::unordered_map freq_freq_targets; + u_map_ freq_freq_targets; for (const auto &elt : freq_targets) { if (elt) { ++targetsDetected; diff --git a/src/bustools_mash.cpp b/src/bustools_mash.cpp index d9c3d17..8faa65d 100644 --- a/src/bustools_mash.cpp +++ b/src/bustools_mash.cpp @@ -12,7 +12,7 @@ #include "bustools_merge.h" -inline std::vector get_tids(const BUSHeader &oh, const std::unordered_map, int32_t, SortedVectorHasher> &ecmapinv, const int32_t &eid) +inline std::vector get_tids(const BUSHeader &oh, const u_map_, int32_t, SortedVectorHasher> &ecmapinv, const int32_t &eid) { std::vector tids = oh.ecs[eid]; @@ -56,7 +56,7 @@ void bustools_mash(const Bustools_opt &opt) std::cerr << "[info] parsed output.bus files" << std::endl; // parse the transcripts.txt - std::unordered_map txn_tid; + u_map_ txn_tid; std::vector> tids_per_file; // list of tids as they occur for each file std::vector tids; // a vector of tids int32_t tid = 0; @@ -101,7 +101,7 @@ void bustools_mash(const Bustools_opt &opt) oh.bclen = vh[0].bclen; oh.umilen = vh[0].umilen; - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; // set{tids} (ec) to eid it came from + u_map_, int32_t, SortedVectorHasher> ecmapinv; // set{tids} (ec) to eid it came from for (int32_t i = 0; i < tid; i++) { diff --git a/src/bustools_merge.cpp b/src/bustools_merge.cpp index f330efe..c89eae3 100644 --- a/src/bustools_merge.cpp +++ b/src/bustools_merge.cpp @@ -57,7 +57,7 @@ void bustools_merge_different_index(const Bustools_opt &opt) std::ifstream ifn(opt.count_txp); std::string txn; int32_t tid; - std::unordered_map txn_tid; + u_map_ txn_tid; std::vector tids; // insert tids into a vector @@ -81,7 +81,7 @@ void bustools_merge_different_index(const Bustools_opt &opt) BUSHeader h, bh; parseECs(opt.count_ecs, h); // put the ecs into a ecmap inv - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; + u_map_, int32_t, SortedVectorHasher> ecmapinv; for (std::size_t ec = 0; ec < h.ecs.size(); ec++) { diff --git a/src/bustools_project.cpp b/src/bustools_project.cpp index c5b1979..6d1e195 100644 --- a/src/bustools_project.cpp +++ b/src/bustools_project.cpp @@ -20,7 +20,7 @@ void bustools_project(Bustools_opt &opt) { size_t stat_map = 0; size_t stat_unmap = 0; - std::unordered_map project_map; + u_map_ project_map; /* Load the map into project_map variable parse bus records and map each object (barcode, umi) with project_map @@ -175,18 +175,18 @@ void bustools_project(Bustools_opt &opt) { } if (opt.type == PROJECT_TX) { std::ofstream of; - std::unordered_map txnames; + u_map_ txnames; parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); - std::unordered_map genenames; + u_map_ genenames; parseGenes(opt.map, txnames, genemap, genenames); std::vector genenamesinv(genenames.size(), ""); for (const auto &gene : genenames) { genenamesinv[gene.second] = gene.first; } - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; + u_map_, int32_t, SortedVectorHasher> ecmapinv; std::vector> ecmap; parseECs(opt.count_ecs, h); ecmap = std::move(h.ecs); @@ -199,7 +199,7 @@ void bustools_project(Bustools_opt &opt) { create_ec2genes(ecmap, genemap, ec2genes); std::vector> geneEc2genes = ec2genes; - std::unordered_map, int32_t, SortedVectorHasher> geneEc2genesinv; + u_map_, int32_t, SortedVectorHasher> geneEc2genesinv; std::sort(geneEc2genes.begin(), geneEc2genes.end()); auto firstNonempty = geneEc2genes.begin(); while (firstNonempty->size() == 0 && firstNonempty != geneEc2genes.end()) { @@ -284,7 +284,7 @@ void bustools_project(Bustools_opt &opt) { BUSData *p = new BUSData[N]; BUSData currRec; // Gene EC --> counts for current barcode/UMI pair - std::unordered_map counts; + u_map_ counts; while (true) { in.read((char*) p, N * sizeof(BUSData)); diff --git a/src/bustools_umicorrect.cpp b/src/bustools_umicorrect.cpp index 6efdadb..a6facbb 100644 --- a/src/bustools_umicorrect.cpp +++ b/src/bustools_umicorrect.cpp @@ -231,13 +231,13 @@ void bustools_umicorrect(const Bustools_opt& opt) { // read and parse the equivelence class files - std::unordered_map, int32_t, SortedVectorHasher> ecmapinv; + u_map_, int32_t, SortedVectorHasher> ecmapinv; std::vector> ecmap; - std::unordered_map txnames; + u_map_ txnames; parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); - std::unordered_map genenames; + u_map_ genenames; parseGenes(opt.count_genes, txnames, genemap, genenames); parseECs(opt.count_ecs, h); ecmap = std::move(h.ecs); diff --git a/src/robin_hood.h b/src/robin_hood.h new file mode 100644 index 0000000..0af031f --- /dev/null +++ b/src/robin_hood.h @@ -0,0 +1,2544 @@ +// ______ _____ ______ _________ +// ______________ ___ /_ ___(_)_______ ___ /_ ______ ______ ______ / +// __ ___/_ __ \__ __ \__ / __ __ \ __ __ \_ __ \_ __ \_ __ / +// _ / / /_/ /_ /_/ /_ / _ / / / _ / / // /_/ // /_/ // /_/ / +// /_/ \____/ /_.___/ /_/ /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/ +// _/_____/ +// +// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20 +// https://github.com/martinus/robin-hood-hashing +// +// Licensed under the MIT License . +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2021 Martin Ankerl +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef ROBIN_HOOD_H_INCLUDED +#define ROBIN_HOOD_H_INCLUDED + +// see https://semver.org/ +#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes +#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner +#define ROBIN_HOOD_VERSION_PATCH 5 // for backwards-compatible bug fixes + +#include +#include +#include +#include +#include +#include // only to support hash of smart pointers +#include +#include +#include +#include +#if __cplusplus >= 201703L +# include +#endif + +// #define ROBIN_HOOD_LOG_ENABLED +#ifdef ROBIN_HOOD_LOG_ENABLED +# include +# define ROBIN_HOOD_LOG(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; +#else +# define ROBIN_HOOD_LOG(x) +#endif + +// #define ROBIN_HOOD_TRACE_ENABLED +#ifdef ROBIN_HOOD_TRACE_ENABLED +# include +# define ROBIN_HOOD_TRACE(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; +#else +# define ROBIN_HOOD_TRACE(x) +#endif + +// #define ROBIN_HOOD_COUNT_ENABLED +#ifdef ROBIN_HOOD_COUNT_ENABLED +# include +# define ROBIN_HOOD_COUNT(x) ++counts().x; +namespace robin_hood { +struct Counts { + uint64_t shiftUp{}; + uint64_t shiftDown{}; +}; +inline std::ostream& operator<<(std::ostream& os, Counts const& c) { + return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl; +} + +static Counts& counts() { + static Counts counts{}; + return counts; +} +} // namespace robin_hood +#else +# define ROBIN_HOOD_COUNT(x) +#endif + +// all non-argument macros should use this facility. See +// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/ +#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x() + +// mark unused members with this macro +#define ROBIN_HOOD_UNUSED(identifier) + +// bitness +#if SIZE_MAX == UINT32_MAX +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32 +#elif SIZE_MAX == UINT64_MAX +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64 +#else +# error Unsupported bitness +#endif + +// endianess +#ifdef _MSC_VER +# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0 +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#endif + +// inline +#ifdef _MSC_VER +# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline) +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline)) +#endif + +// exceptions +#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0 +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1 +#endif + +// count leading/trailing bits +#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS) +# ifdef _MSC_VER +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 +# endif +# include +# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ + [](size_t mask) noexcept -> int { \ + unsigned long index; \ + return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ + : ROBIN_HOOD(BITNESS); \ + }(x) +# else +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll +# endif +# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) +# endif +#endif + +// fallthrough +#ifndef __has_cpp_attribute // For backwards compatibility +# define __has_cpp_attribute(x) 0 +#endif +#if __has_cpp_attribute(clang::fallthrough) +# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]] +#elif __has_cpp_attribute(gnu::fallthrough) +# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]] +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() +#endif + +// likely/unlikely +#ifdef _MSC_VER +# define ROBIN_HOOD_LIKELY(condition) condition +# define ROBIN_HOOD_UNLIKELY(condition) condition +#else +# define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1) +# define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0) +#endif + +// detect if native wchar_t type is availiable in MSVC +#ifdef _MSC_VER +# ifdef _NATIVE_WCHAR_T_DEFINED +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +#endif + +// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr +#ifdef _MSC_VER +# if _MSC_VER <= 1900 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +#endif + +// workaround missing "is_trivially_copyable" in g++ < 5.0 +// See https://stackoverflow.com/a/31798726/48181 +#if defined(__GNUC__) && __GNUC__ < 5 +# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) +#else +# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value +#endif + +// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L + +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) +# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]] +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() +#endif + +namespace robin_hood { + +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) +# define ROBIN_HOOD_STD std +#else + +// c++11 compatibility layer +namespace ROBIN_HOOD_STD { +template +struct alignment_of + : std::integral_constant::type)> {}; + +template +class integer_sequence { +public: + using value_type = T; + static_assert(std::is_integral::value, "not integral type"); + static constexpr std::size_t size() noexcept { + return sizeof...(Ints); + } +}; +template +using index_sequence = integer_sequence; + +namespace detail_ { +template +struct IntSeqImpl { + using TValue = T; + static_assert(std::is_integral::value, "not integral type"); + static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)"); + + template + struct IntSeqCombiner; + + template + struct IntSeqCombiner, integer_sequence> { + using TResult = integer_sequence; + }; + + using TResult = + typename IntSeqCombiner::TResult, + typename IntSeqImpl::TResult>::TResult; +}; + +template +struct IntSeqImpl { + using TValue = T; + static_assert(std::is_integral::value, "not integral type"); + static_assert(Begin >= 0, "unexpected argument (Begin<0)"); + using TResult = integer_sequence; +}; + +template +struct IntSeqImpl { + using TValue = T; + static_assert(std::is_integral::value, "not integral type"); + static_assert(Begin >= 0, "unexpected argument (Begin<0)"); + using TResult = integer_sequence; +}; +} // namespace detail_ + +template +using make_integer_sequence = typename detail_::IntSeqImpl::TResult; + +template +using make_index_sequence = make_integer_sequence; + +template +using index_sequence_for = make_index_sequence; + +} // namespace ROBIN_HOOD_STD + +#endif + +namespace detail { + +// make sure we static_cast to the correct type for hash_int +#if ROBIN_HOOD(BITNESS) == 64 +using SizeT = uint64_t; +#else +using SizeT = uint32_t; +#endif + +template +T rotr(T x, unsigned k) { + return (x >> k) | (x << (8U * sizeof(T) - k)); +} + +// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to +// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with +// care! +template +inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept { + return reinterpret_cast(ptr); +} + +template +inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { + return reinterpret_cast(ptr); +} + +// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other +// inlinings more difficult. Throws are also generally the slow path. +template +[[noreturn]] ROBIN_HOOD(NOINLINE) +#if ROBIN_HOOD(HAS_EXCEPTIONS) + void doThrow(Args&&... args) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) + throw E(std::forward(args)...); +} +#else + void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { + abort(); +} +#endif + +template +T* assertNotNull(T* t, Args&&... args) { + if (ROBIN_HOOD_UNLIKELY(nullptr == t)) { + doThrow(std::forward(args)...); + } + return t; +} + +template +inline T unaligned_load(void const* ptr) noexcept { + // using memcpy so we don't get into unaligned load problems. + // compiler should optimize this very well anyways. + T t; + std::memcpy(&t, ptr, sizeof(T)); + return t; +} + +// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor, +// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a +// pointer. +template +class BulkPoolAllocator { +public: + BulkPoolAllocator() noexcept = default; + + // does not copy anything, just creates a new allocator. + BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept + : mHead(nullptr) + , mListForFree(nullptr) {} + + BulkPoolAllocator(BulkPoolAllocator&& o) noexcept + : mHead(o.mHead) + , mListForFree(o.mListForFree) { + o.mListForFree = nullptr; + o.mHead = nullptr; + } + + BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept { + reset(); + mHead = o.mHead; + mListForFree = o.mListForFree; + o.mListForFree = nullptr; + o.mHead = nullptr; + return *this; + } + + BulkPoolAllocator& + // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) + operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept { + // does not do anything + return *this; + } + + ~BulkPoolAllocator() noexcept { + reset(); + } + + // Deallocates all allocated memory. + void reset() noexcept { + while (mListForFree) { + T* tmp = *mListForFree; + ROBIN_HOOD_LOG("std::free") + std::free(mListForFree); + mListForFree = reinterpret_cast_no_cast_align_warning(tmp); + } + mHead = nullptr; + } + + // allocates, but does NOT initialize. Use in-place new constructor, e.g. + // T* obj = pool.allocate(); + // ::new (static_cast(obj)) T(); + T* allocate() { + T* tmp = mHead; + if (!tmp) { + tmp = performAllocation(); + } + + mHead = *reinterpret_cast_no_cast_align_warning(tmp); + return tmp; + } + + // does not actually deallocate but puts it in store. + // make sure you have already called the destructor! e.g. with + // obj->~T(); + // pool.deallocate(obj); + void deallocate(T* obj) noexcept { + *reinterpret_cast_no_cast_align_warning(obj) = mHead; + mHead = obj; + } + + // Adds an already allocated block of memory to the allocator. This allocator is from now on + // responsible for freeing the data (with free()). If the provided data is not large enough to + // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor. + void addOrFree(void* ptr, const size_t numBytes) noexcept { + // calculate number of available elements in ptr + if (numBytes < ALIGNMENT + ALIGNED_SIZE) { + // not enough data for at least one element. Free and return. + ROBIN_HOOD_LOG("std::free") + std::free(ptr); + } else { + ROBIN_HOOD_LOG("add to buffer") + add(ptr, numBytes); + } + } + + void swap(BulkPoolAllocator& other) noexcept { + using std::swap; + swap(mHead, other.mHead); + swap(mListForFree, other.mListForFree); + } + +private: + // iterates the list of allocated memory to calculate how many to alloc next. + // Recalculating this each time saves us a size_t member. + // This ignores the fact that memory blocks might have been added manually with addOrFree. In + // practice, this should not matter much. + ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept { + auto tmp = mListForFree; + size_t numAllocs = MinNumAllocs; + + while (numAllocs * 2 <= MaxNumAllocs && tmp) { + auto x = reinterpret_cast(tmp); + tmp = *x; + numAllocs *= 2; + } + + return numAllocs; + } + + // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree(). + void add(void* ptr, const size_t numBytes) noexcept { + const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE; + + auto data = reinterpret_cast(ptr); + + // link free list + auto x = reinterpret_cast(data); + *x = mListForFree; + mListForFree = data; + + // create linked list for newly allocated data + auto* const headT = + reinterpret_cast_no_cast_align_warning(reinterpret_cast(ptr) + ALIGNMENT); + + auto* const head = reinterpret_cast(headT); + + // Visual Studio compiler automatically unrolls this loop, which is pretty cool + for (size_t i = 0; i < numElements; ++i) { + *reinterpret_cast_no_cast_align_warning(head + i * ALIGNED_SIZE) = + head + (i + 1) * ALIGNED_SIZE; + } + + // last one points to 0 + *reinterpret_cast_no_cast_align_warning(head + (numElements - 1) * ALIGNED_SIZE) = + mHead; + mHead = headT; + } + + // Called when no memory is available (mHead == 0). + // Don't inline this slow path. + ROBIN_HOOD(NOINLINE) T* performAllocation() { + size_t const numElementsToAlloc = calcNumElementsToAlloc(); + + // alloc new memory: [prev |T, T, ... T] + size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; + ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE + << " * " << numElementsToAlloc) + add(assertNotNull(std::malloc(bytes)), bytes); + return mHead; + } + + // enforce byte alignment of the T's +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) + static constexpr size_t ALIGNMENT = + (std::max)(std::alignment_of::value, std::alignment_of::value); +#else + static const size_t ALIGNMENT = + (ROBIN_HOOD_STD::alignment_of::value > ROBIN_HOOD_STD::alignment_of::value) + ? ROBIN_HOOD_STD::alignment_of::value + : +ROBIN_HOOD_STD::alignment_of::value; // the + is for walkarround +#endif + + static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT; + + static_assert(MinNumAllocs >= 1, "MinNumAllocs"); + static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs"); + static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE"); + static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod"); + static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT"); + + T* mHead{nullptr}; + T** mListForFree{nullptr}; +}; + +template +struct NodeAllocator; + +// dummy allocator that does nothing +template +struct NodeAllocator { + + // we are not using the data, so just free it. + void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept { + ROBIN_HOOD_LOG("std::free") + std::free(ptr); + } +}; + +template +struct NodeAllocator : public BulkPoolAllocator {}; + +// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making +// my own here. +namespace swappable { +#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17) +using std::swap; +template +struct nothrow { + static const bool value = noexcept(swap(std::declval(), std::declval())); +}; +#else +template +struct nothrow { + static const bool value = std::is_nothrow_swappable::value; +}; +#endif +} // namespace swappable + +} // namespace detail + +struct is_transparent_tag {}; + +// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable, +// which means it would not be allowed to be used in std::memcpy. This struct is copyable, which is +// also tested. +template +struct pair { + using first_type = T1; + using second_type = T2; + + template ::value && + std::is_default_constructible::value>::type> + constexpr pair() noexcept(noexcept(U1()) && noexcept(U2())) + : first() + , second() {} + + // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. + explicit constexpr pair(std::pair const& o) noexcept( + noexcept(T1(std::declval())) && noexcept(T2(std::declval()))) + : first(o.first) + , second(o.second) {} + + // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. + explicit constexpr pair(std::pair&& o) noexcept(noexcept( + T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) + : first(std::move(o.first)) + , second(std::move(o.second)) {} + + constexpr pair(T1&& a, T2&& b) noexcept(noexcept( + T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) + : first(std::move(a)) + , second(std::move(b)) {} + + template + constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward( + std::declval()))) && noexcept(T2(std::forward(std::declval())))) + : first(std::forward(a)) + , second(std::forward(b)) {} + + template + // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members" + // if this constructor is constexpr +#if !ROBIN_HOOD(BROKEN_CONSTEXPR) + constexpr +#endif + pair(std::piecewise_construct_t /*unused*/, std::tuple a, + std::tuple + b) noexcept(noexcept(pair(std::declval&>(), + std::declval&>(), + ROBIN_HOOD_STD::index_sequence_for(), + ROBIN_HOOD_STD::index_sequence_for()))) + : pair(a, b, ROBIN_HOOD_STD::index_sequence_for(), + ROBIN_HOOD_STD::index_sequence_for()) { + } + + // constructor called from the std::piecewise_construct_t ctor + template + pair(std::tuple& a, std::tuple& b, ROBIN_HOOD_STD::index_sequence /*unused*/, ROBIN_HOOD_STD::index_sequence /*unused*/) noexcept( + noexcept(T1(std::forward(std::get( + std::declval&>()))...)) && noexcept(T2(std:: + forward(std::get( + std::declval&>()))...))) + : first(std::forward(std::get(a))...) + , second(std::forward(std::get(b))...) { + // make visual studio compiler happy about warning about unused a & b. + // Visual studio's pair implementation disables warning 4100. + (void)a; + (void)b; + } + + void swap(pair& o) noexcept((detail::swappable::nothrow::value) && + (detail::swappable::nothrow::value)) { + using std::swap; + swap(first, o.first); + swap(second, o.second); + } + + T1 first; // NOLINT(misc-non-private-member-variables-in-classes) + T2 second; // NOLINT(misc-non-private-member-variables-in-classes) +}; + +template +inline void swap(pair& a, pair& b) noexcept( + noexcept(std::declval&>().swap(std::declval&>()))) { + a.swap(b); +} + +template +inline constexpr bool operator==(pair const& x, pair const& y) { + return (x.first == y.first) && (x.second == y.second); +} +template +inline constexpr bool operator!=(pair const& x, pair const& y) { + return !(x == y); +} +template +inline constexpr bool operator<(pair const& x, pair const& y) noexcept(noexcept( + std::declval() < std::declval()) && noexcept(std::declval() < + std::declval())) { + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} +template +inline constexpr bool operator>(pair const& x, pair const& y) { + return y < x; +} +template +inline constexpr bool operator<=(pair const& x, pair const& y) { + return !(x > y); +} +template +inline constexpr bool operator>=(pair const& x, pair const& y) { + return !(x < y); +} + +inline size_t hash_bytes(void const* ptr, size_t len) noexcept { + static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); + static constexpr uint64_t seed = UINT64_C(0xe17a1465); + static constexpr unsigned int r = 47; + + auto const* const data64 = static_cast(ptr); + uint64_t h = seed ^ (len * m); + + size_t const n_blocks = len / 8; + for (size_t i = 0; i < n_blocks; ++i) { + auto k = detail::unaligned_load(data64 + i); + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + auto const* const data8 = reinterpret_cast(data64 + n_blocks); + switch (len & 7U) { + case 7: + h ^= static_cast(data8[6]) << 48U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 6: + h ^= static_cast(data8[5]) << 40U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 5: + h ^= static_cast(data8[4]) << 32U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 4: + h ^= static_cast(data8[3]) << 24U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 3: + h ^= static_cast(data8[2]) << 16U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 2: + h ^= static_cast(data8[1]) << 8U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 1: + h ^= static_cast(data8[0]); + h *= m; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + default: + break; + } + + h ^= h >> r; + + // not doing the final step here, because this will be done by keyToIdx anyways + // h *= m; + // h ^= h >> r; + return static_cast(h); +} + +inline size_t hash_int(uint64_t x) noexcept { + // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested, + // and doesn't need any special 128bit operations. + x ^= x >> 33U; + x *= UINT64_C(0xff51afd7ed558ccd); + x ^= x >> 33U; + + // not doing the final step here, because this will be done by keyToIdx anyways + // x *= UINT64_C(0xc4ceb9fe1a85ec53); + // x ^= x >> 33U; + return static_cast(x); +} + +// A thin wrapper around std::hash, performing an additional simple mixing step of the result. +template +struct hash : public std::hash { + size_t operator()(T const& obj) const + noexcept(noexcept(std::declval>().operator()(std::declval()))) { + // call base hash + auto result = std::hash::operator()(obj); + // return mixed of that, to be save against identity has + return hash_int(static_cast(result)); + } +}; + +template +struct hash> { + size_t operator()(std::basic_string const& str) const noexcept { + return hash_bytes(str.data(), sizeof(CharT) * str.size()); + } +}; + +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) +template +struct hash> { + size_t operator()(std::basic_string_view const& sv) const noexcept { + return hash_bytes(sv.data(), sizeof(CharT) * sv.size()); + } +}; +#endif + +template +struct hash { + size_t operator()(T* ptr) const noexcept { + return hash_int(reinterpret_cast(ptr)); + } +}; + +template +struct hash> { + size_t operator()(std::unique_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash> { + size_t operator()(std::shared_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash::value>::type> { + size_t operator()(Enum e) const noexcept { + using Underlying = typename std::underlying_type::type; + return hash{}(static_cast(e)); + } +}; + +#define ROBIN_HOOD_HASH_INT(T) \ + template <> \ + struct hash { \ + size_t operator()(T const& obj) const noexcept { \ + return hash_int(static_cast(obj)); \ + } \ + } + +#if defined(__GNUC__) && !defined(__clang__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wuseless-cast" +#endif +// see https://en.cppreference.com/w/cpp/utility/hash +ROBIN_HOOD_HASH_INT(bool); +ROBIN_HOOD_HASH_INT(char); +ROBIN_HOOD_HASH_INT(signed char); +ROBIN_HOOD_HASH_INT(unsigned char); +ROBIN_HOOD_HASH_INT(char16_t); +ROBIN_HOOD_HASH_INT(char32_t); +#if ROBIN_HOOD(HAS_NATIVE_WCHART) +ROBIN_HOOD_HASH_INT(wchar_t); +#endif +ROBIN_HOOD_HASH_INT(short); +ROBIN_HOOD_HASH_INT(unsigned short); +ROBIN_HOOD_HASH_INT(int); +ROBIN_HOOD_HASH_INT(unsigned int); +ROBIN_HOOD_HASH_INT(long); +ROBIN_HOOD_HASH_INT(long long); +ROBIN_HOOD_HASH_INT(unsigned long); +ROBIN_HOOD_HASH_INT(unsigned long long); +#if defined(__GNUC__) && !defined(__clang__) +# pragma GCC diagnostic pop +#endif +namespace detail { + +template +struct void_type { + using type = void; +}; + +template +struct has_is_transparent : public std::false_type {}; + +template +struct has_is_transparent::type> + : public std::true_type {}; + +// using wrapper classes for hash and key_equal prevents the diamond problem when the same type +// is used. see https://stackoverflow.com/a/28771920/48181 +template +struct WrapHash : public T { + WrapHash() = default; + explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval()))) + : T(o) {} +}; + +template +struct WrapKeyEqual : public T { + WrapKeyEqual() = default; + explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval()))) + : T(o) {} +}; + +// A highly optimized hashmap implementation, using the Robin Hood algorithm. +// +// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but +// be about 2x faster in most cases and require much less allocations. +// +// This implementation uses the following memory layout: +// +// [Node, Node, ... Node | info, info, ... infoSentinel ] +// +// * Node: either a DataNode that directly has the std::pair as member, +// or a DataNode with a pointer to std::pair. Which DataNode representation to use +// depends on how fast the swap() operation is. Heuristically, this is automatically choosen +// based on sizeof(). there are always 2^n Nodes. +// +// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes. +// Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the +// corresponding node contains data. Set to 2 means the corresponding Node is filled, but it +// actually belongs to the previous position and was pushed out because that place is already +// taken. +// +// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the +// need for a idx variable. +// +// According to STL, order of templates has effect on throughput. That's why I've moved the +// boolean to the front. +// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/ +template +class Table + : public WrapHash, + public WrapKeyEqual, + detail::NodeAllocator< + typename std::conditional< + std::is_void::value, Key, + robin_hood::pair::type, T>>::type, + 4, 16384, IsFlat> { +public: + static constexpr bool is_flat = IsFlat; + static constexpr bool is_map = !std::is_void::value; + static constexpr bool is_set = !is_map; + static constexpr bool is_transparent = + has_is_transparent::value && has_is_transparent::value; + + using key_type = Key; + using mapped_type = T; + using value_type = typename std::conditional< + is_set, Key, + robin_hood::pair::type, T>>::type; + using size_type = size_t; + using hasher = Hash; + using key_equal = KeyEqual; + using Self = Table; + +private: + static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100, + "MaxLoadFactor100 needs to be >10 && < 100"); + + using WHash = WrapHash; + using WKeyEqual = WrapKeyEqual; + + // configuration defaults + + // make sure we have 8 elements, needed to quickly rehash mInfo + static constexpr size_t InitialNumElements = sizeof(uint64_t); + static constexpr uint32_t InitialInfoNumBits = 5; + static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits; + static constexpr size_t InfoMask = InitialInfoInc - 1U; + static constexpr uint8_t InitialInfoHashShift = 0; + using DataPool = detail::NodeAllocator; + + // type needs to be wider than uint8_t. + using InfoType = uint32_t; + + // DataNode //////////////////////////////////////////////////////// + + // Primary template for the data node. We have special implementations for small and big + // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these + // on the heap so swap merely swaps a pointer. + template + class DataNode {}; + + // Small: just allocate on the stack. + template + class DataNode final { + public: + template + explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept( + noexcept(value_type(std::forward(args)...))) + : mData(std::forward(args)...) {} + + DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept( + std::is_nothrow_move_constructible::value) + : mData(std::move(n.mData)) {} + + // doesn't do anything + void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {} + void destroyDoNotDeallocate() noexcept {} + + value_type const* operator->() const noexcept { + return &mData; + } + value_type* operator->() noexcept { + return &mData; + } + + const value_type& operator*() const noexcept { + return mData; + } + + value_type& operator*() noexcept { + return mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return mData.first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type + getFirst() const noexcept { + return mData.first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() const noexcept { + return mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() noexcept { + return mData.second; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() const noexcept { + return mData.second; + } + + void swap(DataNode& o) noexcept( + noexcept(std::declval().swap(std::declval()))) { + mData.swap(o.mData); + } + + private: + value_type mData; + }; + + // big object: allocate on heap. + template + class DataNode { + public: + template + explicit DataNode(M& map, Args&&... args) + : mData(map.allocate()) { + ::new (static_cast(mData)) value_type(std::forward(args)...); + } + + DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept + : mData(std::move(n.mData)) {} + + void destroy(M& map) noexcept { + // don't deallocate, just put it into list of datapool. + mData->~value_type(); + map.deallocate(mData); + } + + void destroyDoNotDeallocate() noexcept { + mData->~value_type(); + } + + value_type const* operator->() const noexcept { + return mData; + } + + value_type* operator->() noexcept { + return mData; + } + + const value_type& operator*() const { + return *mData; + } + + value_type& operator*() { + return *mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return mData->first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return *mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type + getFirst() const noexcept { + return mData->first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() const noexcept { + return *mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() noexcept { + return mData->second; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() const noexcept { + return mData->second; + } + + void swap(DataNode& o) noexcept { + using std::swap; + swap(mData, o.mData); + } + + private: + value_type* mData; + }; + + using Node = DataNode; + + // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required) + ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept { + return n.getFirst(); + } + + // in case we have void mapped_type, we are not using a pair, thus we just route k through. + // No need to disable this because it's just not used if not applicable. + ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept { + return k; + } + + // in case we have non-void mapped_type, we have a standard robin_hood::pair + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::value, key_type const&>::type + getFirstConst(value_type const& vt) const noexcept { + return vt.first; + } + + // Cloner ////////////////////////////////////////////////////////// + + template + struct Cloner; + + // fast path: Just copy data, without allocating anything. + template + struct Cloner { + void operator()(M const& source, M& target) const { + auto const* const src = reinterpret_cast(source.mKeyVals); + auto* tgt = reinterpret_cast(target.mKeyVals); + auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1); + std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt); + } + }; + + template + struct Cloner { + void operator()(M const& s, M& t) const { + auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1); + std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo); + + for (size_t i = 0; i < numElementsWithBuffer; ++i) { + if (t.mInfo[i]) { + ::new (static_cast(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]); + } + } + } + }; + + // Destroyer /////////////////////////////////////////////////////// + + template + struct Destroyer {}; + + template + struct Destroyer { + void nodes(M& m) const noexcept { + m.mNumElements = 0; + } + + void nodesDoNotDeallocate(M& m) const noexcept { + m.mNumElements = 0; + } + }; + + template + struct Destroyer { + void nodes(M& m) const noexcept { + m.mNumElements = 0; + // clear also resets mInfo to 0, that's sometimes not necessary. + auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); + + for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { + if (0 != m.mInfo[idx]) { + Node& n = m.mKeyVals[idx]; + n.destroy(m); + n.~Node(); + } + } + } + + void nodesDoNotDeallocate(M& m) const noexcept { + m.mNumElements = 0; + // clear also resets mInfo to 0, that's sometimes not necessary. + auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); + for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { + if (0 != m.mInfo[idx]) { + Node& n = m.mKeyVals[idx]; + n.destroyDoNotDeallocate(); + n.~Node(); + } + } + } + }; + + // Iter //////////////////////////////////////////////////////////// + + struct fast_forward_tag {}; + + // generic iterator for both const_iterator and iterator. + template + // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions) + class Iter { + private: + using NodePtr = typename std::conditional::type; + + public: + using difference_type = std::ptrdiff_t; + using value_type = typename Self::value_type; + using reference = typename std::conditional::type; + using pointer = typename std::conditional::type; + using iterator_category = std::forward_iterator_tag; + + // default constructed iterator can be compared to itself, but WON'T return true when + // compared to end(). + Iter() = default; + + // Rule of zero: nothing specified. The conversion constructor is only enabled for + // iterator to const_iterator, so it doesn't accidentally work as a copy ctor. + + // Conversion constructor from iterator to const_iterator. + template ::type> + // NOLINTNEXTLINE(hicpp-explicit-conversions) + Iter(Iter const& other) noexcept + : mKeyVals(other.mKeyVals) + , mInfo(other.mInfo) {} + + Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept + : mKeyVals(valPtr) + , mInfo(infoPtr) {} + + Iter(NodePtr valPtr, uint8_t const* infoPtr, + fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept + : mKeyVals(valPtr) + , mInfo(infoPtr) { + fastForward(); + } + + template ::type> + Iter& operator=(Iter const& other) noexcept { + mKeyVals = other.mKeyVals; + mInfo = other.mInfo; + return *this; + } + + // prefix increment. Undefined behavior if we are at end()! + Iter& operator++() noexcept { + mInfo++; + mKeyVals++; + fastForward(); + return *this; + } + + Iter operator++(int) noexcept { + Iter tmp = *this; + ++(*this); + return tmp; + } + + reference operator*() const { + return **mKeyVals; + } + + pointer operator->() const { + return &**mKeyVals; + } + + template + bool operator==(Iter const& o) const noexcept { + return mKeyVals == o.mKeyVals; + } + + template + bool operator!=(Iter const& o) const noexcept { + return mKeyVals != o.mKeyVals; + } + + private: + // fast forward to the next non-free info byte + // I've tried a few variants that don't depend on intrinsics, but unfortunately they are + // quite a bit slower than this one. So I've reverted that change again. See map_benchmark. + void fastForward() noexcept { + size_t n = 0; + while (0U == (n = detail::unaligned_load(mInfo))) { + mInfo += sizeof(size_t); + mKeyVals += sizeof(size_t); + } +#if defined(ROBIN_HOOD_DISABLE_INTRINSICS) + // we know for certain that within the next 8 bytes we'll find a non-zero one. + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 4; + mKeyVals += 4; + } + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 2; + mKeyVals += 2; + } + if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) { + mInfo += 1; + mKeyVals += 1; + } +#else +# if ROBIN_HOOD(LITTLE_ENDIAN) + auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; +# else + auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; +# endif + mInfo += inc; + mKeyVals += inc; +#endif + } + + friend class Table; + NodePtr mKeyVals{nullptr}; + uint8_t const* mInfo{nullptr}; + }; + + //////////////////////////////////////////////////////////////////// + + // highly performance relevant code. + // Lower bits are used for indexing into the array (2^n size) + // The upper 1-5 bits need to be a reasonable good hash, to save comparisons. + template + void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const { + // In addition to whatever hash is used, add another mul & shift so we get better hashing. + // This serves as a bad hash prevention, if the given data is + // badly mixed. + auto h = static_cast(WHash::operator()(key)); + + h *= mHashMultiplier; + h ^= h >> 33U; + + // the lower InitialInfoNumBits are reserved for info. + *info = mInfoInc + static_cast((h & InfoMask) >> mInfoHashShift); + *idx = (static_cast(h) >> InitialInfoNumBits) & mMask; + } + + // forwards the index by one, wrapping around at the end + void next(InfoType* info, size_t* idx) const noexcept { + *idx = *idx + 1; + *info += mInfoInc; + } + + void nextWhileLess(InfoType* info, size_t* idx) const noexcept { + // unrolling this by hand did not bring any speedups. + while (*info < mInfo[*idx]) { + next(info, idx); + } + } + + // Shift everything up by one element. Tries to move stuff around. + void + shiftUp(size_t startIdx, + size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable::value) { + auto idx = startIdx; + ::new (static_cast(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1])); + while (--idx != insertion_idx) { + mKeyVals[idx] = std::move(mKeyVals[idx - 1]); + } + + idx = startIdx; + while (idx != insertion_idx) { + ROBIN_HOOD_COUNT(shiftUp) + mInfo[idx] = static_cast(mInfo[idx - 1] + mInfoInc); + if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) { + mMaxNumElementsAllowed = 0; + } + --idx; + } + } + + void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable::value) { + // until we find one that is either empty or has zero offset. + // TODO(martinus) we don't need to move everything, just the last one for the same + // bucket. + mKeyVals[idx].destroy(*this); + + // until we find one that is either empty or has zero offset. + while (mInfo[idx + 1] >= 2 * mInfoInc) { + ROBIN_HOOD_COUNT(shiftDown) + mInfo[idx] = static_cast(mInfo[idx + 1] - mInfoInc); + mKeyVals[idx] = std::move(mKeyVals[idx + 1]); + ++idx; + } + + mInfo[idx] = 0; + // don't destroy, we've moved it + // mKeyVals[idx].destroy(*this); + mKeyVals[idx].~Node(); + } + + // copy of find(), except that it returns iterator instead of const_iterator. + template + ROBIN_HOOD(NODISCARD) + size_t findIdx(Other const& key) const { + size_t idx{}; + InfoType info{}; + keyToIdx(key, &idx, &info); + + do { + // unrolling this twice gives a bit of a speedup. More unrolling did not help. + if (info == mInfo[idx] && + ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { + return idx; + } + next(&info, &idx); + if (info == mInfo[idx] && + ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { + return idx; + } + next(&info, &idx); + } while (info <= mInfo[idx]); + + // nothing found! + return mMask == 0 ? 0 + : static_cast(std::distance( + mKeyVals, reinterpret_cast_no_cast_align_warning(mInfo))); + } + + void cloneData(const Table& o) { + Cloner()(o, *this); + } + + // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. + // @return True on success, false if something went wrong + void insert_move(Node&& keyval) { + // we don't retry, fail if overflowing + // don't need to check max num elements + if (0 == mMaxNumElementsAllowed && !try_increase_info()) { + throwOverflowError(); + } + + size_t idx{}; + InfoType info{}; + keyToIdx(keyval.getFirst(), &idx, &info); + + // skip forward. Use <= because we are certain that the element is not there. + while (info <= mInfo[idx]) { + idx = idx + 1; + info += mInfoInc; + } + + // key not found, so we are now exactly where we want to insert it. + auto const insertion_idx = idx; + auto const insertion_info = static_cast(info); + if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { + mMaxNumElementsAllowed = 0; + } + + // find an empty spot + while (0 != mInfo[idx]) { + next(&info, &idx); + } + + auto& l = mKeyVals[insertion_idx]; + if (idx == insertion_idx) { + ::new (static_cast(&l)) Node(std::move(keyval)); + } else { + shiftUp(idx, insertion_idx); + l = std::move(keyval); + } + + // put at empty spot + mInfo[insertion_idx] = insertion_info; + + ++mNumElements; + } + +public: + using iterator = Iter; + using const_iterator = Iter; + + Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual())) + : WHash() + , WKeyEqual() { + ROBIN_HOOD_TRACE(this) + } + + // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. + // This tremendously speeds up ctor & dtor of a map that never receives an element. The + // penalty is payed at the first insert, and not before. Lookup of this empty map works + // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the + // standard, but we can ignore it. + explicit Table( + size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{}, + const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal))) + : WHash(h) + , WKeyEqual(equal) { + ROBIN_HOOD_TRACE(this) + } + + template + Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, + const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) + : WHash(h) + , WKeyEqual(equal) { + ROBIN_HOOD_TRACE(this) + insert(first, last); + } + + Table(std::initializer_list initlist, + size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, + const KeyEqual& equal = KeyEqual{}) + : WHash(h) + , WKeyEqual(equal) { + ROBIN_HOOD_TRACE(this) + insert(initlist.begin(), initlist.end()); + } + + Table(Table&& o) noexcept + : WHash(std::move(static_cast(o))) + , WKeyEqual(std::move(static_cast(o))) + , DataPool(std::move(static_cast(o))) { + ROBIN_HOOD_TRACE(this) + if (o.mMask) { + mHashMultiplier = std::move(o.mHashMultiplier); + mKeyVals = std::move(o.mKeyVals); + mInfo = std::move(o.mInfo); + mNumElements = std::move(o.mNumElements); + mMask = std::move(o.mMask); + mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); + mInfoInc = std::move(o.mInfoInc); + mInfoHashShift = std::move(o.mInfoHashShift); + // set other's mask to 0 so its destructor won't do anything + o.init(); + } + } + + Table& operator=(Table&& o) noexcept { + ROBIN_HOOD_TRACE(this) + if (&o != this) { + if (o.mMask) { + // only move stuff if the other map actually has some data + destroy(); + mHashMultiplier = std::move(o.mHashMultiplier); + mKeyVals = std::move(o.mKeyVals); + mInfo = std::move(o.mInfo); + mNumElements = std::move(o.mNumElements); + mMask = std::move(o.mMask); + mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); + mInfoInc = std::move(o.mInfoInc); + mInfoHashShift = std::move(o.mInfoHashShift); + WHash::operator=(std::move(static_cast(o))); + WKeyEqual::operator=(std::move(static_cast(o))); + DataPool::operator=(std::move(static_cast(o))); + + o.init(); + + } else { + // nothing in the other map => just clear us. + clear(); + } + } + return *this; + } + + Table(const Table& o) + : WHash(static_cast(o)) + , WKeyEqual(static_cast(o)) + , DataPool(static_cast(o)) { + ROBIN_HOOD_TRACE(this) + if (!o.empty()) { + // not empty: create an exact copy. it is also possible to just iterate through all + // elements and insert them, but copying is probably faster. + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mHashMultiplier = o.mHashMultiplier; + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); + // no need for calloc because clonData does memcpy + mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); + mNumElements = o.mNumElements; + mMask = o.mMask; + mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; + mInfoInc = o.mInfoInc; + mInfoHashShift = o.mInfoHashShift; + cloneData(o); + } + } + + // Creates a copy of the given map. Copy constructor of each entry is used. + // Not sure why clang-tidy thinks this doesn't handle self assignment, it does + // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) + Table& operator=(Table const& o) { + ROBIN_HOOD_TRACE(this) + if (&o == this) { + // prevent assigning of itself + return *this; + } + + // we keep using the old allocator and not assign the new one, because we want to keep + // the memory available. when it is the same size. + if (o.empty()) { + if (0 == mMask) { + // nothing to do, we are empty too + return *this; + } + + // not empty: destroy what we have there + // clear also resets mInfo to 0, that's sometimes not necessary. + destroy(); + init(); + WHash::operator=(static_cast(o)); + WKeyEqual::operator=(static_cast(o)); + DataPool::operator=(static_cast(o)); + + return *this; + } + + // clean up old stuff + Destroyer::value>{}.nodes(*this); + + if (mMask != o.mMask) { + // no luck: we don't have the same array size allocated, so we need to realloc. + if (0 != mMask) { + // only deallocate if we actually have data! + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); + } + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); + + // no need for calloc here because cloneData performs a memcpy. + mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); + // sentinel is set in cloneData + } + WHash::operator=(static_cast(o)); + WKeyEqual::operator=(static_cast(o)); + DataPool::operator=(static_cast(o)); + mHashMultiplier = o.mHashMultiplier; + mNumElements = o.mNumElements; + mMask = o.mMask; + mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; + mInfoInc = o.mInfoInc; + mInfoHashShift = o.mInfoHashShift; + cloneData(o); + + return *this; + } + + // Swaps everything between the two maps. + void swap(Table& o) { + ROBIN_HOOD_TRACE(this) + using std::swap; + swap(o, *this); + } + + // Clears all data, without resizing. + void clear() { + ROBIN_HOOD_TRACE(this) + if (empty()) { + // don't do anything! also important because we don't want to write to + // DummyInfoByte::b, even though we would just write 0 to it. + return; + } + + Destroyer::value>{}.nodes(*this); + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); + // clear everything, then set the sentinel again + uint8_t const z = 0; + std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z); + mInfo[numElementsWithBuffer] = 1; + + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + } + + // Destroys the map and all it's contents. + ~Table() { + ROBIN_HOOD_TRACE(this) + destroy(); + } + + // Checks if both tables contain the same entries. Order is irrelevant. + bool operator==(const Table& other) const { + ROBIN_HOOD_TRACE(this) + if (other.size() != size()) { + return false; + } + for (auto const& otherEntry : other) { + if (!has(otherEntry)) { + return false; + } + } + + return true; + } + + bool operator!=(const Table& other) const { + ROBIN_HOOD_TRACE(this) + return !operator==(other); + } + + template + typename std::enable_if::value, Q&>::type operator[](const key_type& key) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(key), std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); + } + + template + typename std::enable_if::value, Q&>::type operator[](key_type&& key) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); + } + + template + void insert(Iter first, Iter last) { + for (; first != last; ++first) { + // value_type ctor needed because this might be called with std::pair's + insert(value_type(*first)); + } + } + + void insert(std::initializer_list ilist) { + for (auto&& vt : ilist) { + insert(std::move(vt)); + } + } + + template + std::pair emplace(Args&&... args) { + ROBIN_HOOD_TRACE(this) + Node n{*this, std::forward(args)...}; + auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n)); + switch (idxAndState.second) { + case InsertionState::key_found: + n.destroy(*this); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node(*this, std::move(n)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = std::move(n); + break; + + case InsertionState::overflow_error: + n.destroy(*this); + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + iterator emplace_hint(const_iterator position, Args&&... args) { + (void)position; + return emplace(std::forward(args)...).first; + } + + template + std::pair try_emplace(const key_type& key, Args&&... args) { + return try_emplace_impl(key, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& key, Args&&... args) { + return try_emplace_impl(std::move(key), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) { + (void)hint; + return try_emplace_impl(key, std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) { + (void)hint; + return try_emplace_impl(std::move(key), std::forward(args)...).first; + } + + template + std::pair insert_or_assign(const key_type& key, Mapped&& obj) { + return insertOrAssignImpl(key, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& key, Mapped&& obj) { + return insertOrAssignImpl(std::move(key), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(key, std::forward(obj)).first; + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(std::move(key), std::forward(obj)).first; + } + + std::pair insert(const value_type& keyval) { + ROBIN_HOOD_TRACE(this) + return emplace(keyval); + } + + iterator insert(const_iterator hint, const value_type& keyval) { + (void)hint; + return emplace(keyval).first; + } + + std::pair insert(value_type&& keyval) { + return emplace(std::move(keyval)); + } + + iterator insert(const_iterator hint, value_type&& keyval) { + (void)hint; + return emplace(std::move(keyval)).first; + } + + // Returns 1 if key is found, 0 otherwise. + size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { + return 1; + } + return 0; + } + + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::type count(const OtherKey& key) const { + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { + return 1; + } + return 0; + } + + bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard) + return 1U == count(key); + } + + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::type contains(const OtherKey& key) const { + return 1U == count(key); + } + + // Returns a reference to the value found for key. + // Throws std::out_of_range if element cannot be found + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::value, Q&>::type at(key_type const& key) { + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { + doThrow("key not found"); + } + return kv->getSecond(); + } + + // Returns a reference to the value found for key. + // Throws std::out_of_range if element cannot be found + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::value, Q const&>::type at(key_type const& key) const { + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { + doThrow("key not found"); + } + return kv->getSecond(); + } + + const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + template + const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + template + typename std::enable_if::type // NOLINT(modernize-use-nodiscard) + find(const OtherKey& key) const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + iterator find(const key_type& key) { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + template + iterator find(const OtherKey& key, is_transparent_tag /*unused*/) { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + template + typename std::enable_if::type find(const OtherKey& key) { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + iterator begin() { + ROBIN_HOOD_TRACE(this) + if (empty()) { + return end(); + } + return iterator(mKeyVals, mInfo, fast_forward_tag{}); + } + const_iterator begin() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return cbegin(); + } + const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + if (empty()) { + return cend(); + } + return const_iterator(mKeyVals, mInfo, fast_forward_tag{}); + } + + iterator end() { + ROBIN_HOOD_TRACE(this) + // no need to supply valid info pointer: end() must not be dereferenced, and only node + // pointer is compared. + return iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; + } + const_iterator end() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return cend(); + } + const_iterator cend() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return const_iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; + } + + iterator erase(const_iterator pos) { + ROBIN_HOOD_TRACE(this) + // its safe to perform const cast here + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + return erase(iterator{const_cast(pos.mKeyVals), const_cast(pos.mInfo)}); + } + + // Erases element at pos, returns iterator to the next element. + iterator erase(iterator pos) { + ROBIN_HOOD_TRACE(this) + // we assume that pos always points to a valid entry, and not end(). + auto const idx = static_cast(pos.mKeyVals - mKeyVals); + + shiftDown(idx); + --mNumElements; + + if (*pos.mInfo) { + // we've backward shifted, return this again + return pos; + } + + // no backward shift, return next element + return ++pos; + } + + size_t erase(const key_type& key) { + ROBIN_HOOD_TRACE(this) + size_t idx{}; + InfoType info{}; + keyToIdx(key, &idx, &info); + + // check while info matches with the source idx + do { + if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + shiftDown(idx); + --mNumElements; + return 1; + } + next(&info, &idx); + } while (info <= mInfo[idx]); + + // nothing found to delete + return 0; + } + + // reserves space for the specified number of elements. Makes sure the old data fits. + // exactly the same as reserve(c). + void rehash(size_t c) { + // forces a reserve + reserve(c, true); + } + + // reserves space for the specified number of elements. Makes sure the old data fits. + // Exactly the same as rehash(c). Use rehash(0) to shrink to fit. + void reserve(size_t c) { + // reserve, but don't force rehash + reserve(c, false); + } + + // If possible reallocates the map to a smaller one. This frees the underlying table. + // Does not do anything if load_factor is too large for decreasing the table's size. + void compact() { + ROBIN_HOOD_TRACE(this) + auto newSize = InitialNumElements; + while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) { + newSize *= 2; + } + if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { + throwOverflowError(); + } + + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (newSize < mMask + 1) { + rehashPowerOfTwo(newSize, true); + } + } + + size_type size() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return mNumElements; + } + + size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return static_cast(-1); + } + + ROBIN_HOOD(NODISCARD) bool empty() const noexcept { + ROBIN_HOOD_TRACE(this) + return 0 == mNumElements; + } + + float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return MaxLoadFactor100 / 100.0F; + } + + // Average number of elements per bucket. Since we allow only 1 per bucket + float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return static_cast(size()) / static_cast(mMask + 1); + } + + ROBIN_HOOD(NODISCARD) size_t mask() const noexcept { + ROBIN_HOOD_TRACE(this) + return mMask; + } + + ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept { + if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits::max)() / 100)) { + return maxElements * MaxLoadFactor100 / 100; + } + + // we might be a bit inprecise, but since maxElements is quite large that doesn't matter + return (maxElements / 100) * MaxLoadFactor100; + } + + ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept { + // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load + // 64bit types. + return numElements + sizeof(uint64_t); + } + + ROBIN_HOOD(NODISCARD) + size_t calcNumElementsWithBuffer(size_t numElements) const noexcept { + auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements); + return numElements + (std::min)(maxNumElementsAllowed, (static_cast(0xFF))); + } + + // calculation only allowed for 2^n values + ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const { +#if ROBIN_HOOD(BITNESS) == 64 + return numElements * sizeof(Node) + calcNumBytesInfo(numElements); +#else + // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows. + auto const ne = static_cast(numElements); + auto const s = static_cast(sizeof(Node)); + auto const infos = static_cast(calcNumBytesInfo(numElements)); + + auto const total64 = ne * s + infos; + auto const total = static_cast(total64); + + if (ROBIN_HOOD_UNLIKELY(static_cast(total) != total64)) { + throwOverflowError(); + } + return total; +#endif + } + +private: + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::value, bool>::type has(const value_type& e) const { + ROBIN_HOOD_TRACE(this) + auto it = find(e.first); + return it != end() && it->second == e.second; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::value, bool>::type has(const value_type& e) const { + ROBIN_HOOD_TRACE(this) + return find(e) != end(); + } + + void reserve(size_t c, bool forceRehash) { + ROBIN_HOOD_TRACE(this) + auto const minElementsAllowed = (std::max)(c, mNumElements); + auto newSize = InitialNumElements; + while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { + newSize *= 2; + } + if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { + throwOverflowError(); + } + + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (forceRehash || newSize > mMask + 1) { + rehashPowerOfTwo(newSize, false); + } + } + + // reserves space for at least the specified number of elements. + // only works if numBuckets if power of two + // True on success, false otherwise + void rehashPowerOfTwo(size_t numBuckets, bool forceFree) { + ROBIN_HOOD_TRACE(this) + + Node* const oldKeyVals = mKeyVals; + uint8_t const* const oldInfo = mInfo; + + const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); + + // resize operation: move stuff + initData(numBuckets); + if (oldMaxElementsWithBuffer > 1) { + for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) { + if (oldInfo[i] != 0) { + // might throw an exception, which is really bad since we are in the middle of + // moving stuff. + insert_move(std::move(oldKeyVals[i])); + // destroy the node but DON'T destroy the data. + oldKeyVals[i].~Node(); + } + } + + // this check is not necessary as it's guarded by the previous if, but it helps + // silence g++'s overeager "attempt to free a non-heap object 'map' + // [-Werror=free-nonheap-object]" warning. + if (oldKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { + // don't destroy old data: put it into the pool instead + if (forceFree) { + std::free(oldKeyVals); + } else { + DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); + } + } + } + } + + ROBIN_HOOD(NOINLINE) void throwOverflowError() const { +#if ROBIN_HOOD(HAS_EXCEPTIONS) + throw std::overflow_error("robin_hood::map overflow"); +#else + abort(); +#endif + } + + template + std::pair try_emplace_impl(OtherKey&& key, Args&&... args) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + std::pair insertOrAssignImpl(OtherKey&& key, Mapped&& obj) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + mKeyVals[idxAndState.first].getSecond() = std::forward(obj); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + void initData(size_t max_elements) { + mNumElements = 0; + mMask = max_elements - 1; + mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements); + + // malloc & zero mInfo. Faster than calloc everything. + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = reinterpret_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); + mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); + std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node)); + + // set sentinel + mInfo[numElementsWithBuffer] = 1; + + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + } + + enum class InsertionState { overflow_error, key_found, new_node, overwrite_node }; + + // Finds key, and if not already present prepares a spot where to pot the key & value. + // This potentially shifts nodes out of the way, updates mInfo and number of inserted + // elements, so the only operation left to do is create/assign a new node at that spot. + template + std::pair insertKeyPrepareEmptySpot(OtherKey&& key) { + for (int i = 0; i < 256; ++i) { + size_t idx{}; + InfoType info{}; + keyToIdx(key, &idx, &info); + nextWhileLess(&info, &idx); + + // while we potentially have a match + while (info == mInfo[idx]) { + if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + // key already exists, do NOT insert. + // see http://en.cppreference.com/w/cpp/container/unordered_map/insert + return std::make_pair(idx, InsertionState::key_found); + } + next(&info, &idx); + } + + // unlikely that this evaluates to true + if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { + if (!increase_size()) { + return std::make_pair(size_t(0), InsertionState::overflow_error); + } + continue; + } + + // key not found, so we are now exactly where we want to insert it. + auto const insertion_idx = idx; + auto const insertion_info = info; + if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { + mMaxNumElementsAllowed = 0; + } + + // find an empty spot + while (0 != mInfo[idx]) { + next(&info, &idx); + } + + if (idx != insertion_idx) { + shiftUp(idx, insertion_idx); + } + // put at empty spot + mInfo[insertion_idx] = static_cast(insertion_info); + ++mNumElements; + return std::make_pair(insertion_idx, idx == insertion_idx + ? InsertionState::new_node + : InsertionState::overwrite_node); + } + + // enough attempts failed, so finally give up. + return std::make_pair(size_t(0), InsertionState::overflow_error); + } + + bool try_increase_info() { + ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements + << ", maxNumElementsAllowed=" + << calcMaxNumElementsAllowed(mMask + 1)) + if (mInfoInc <= 2) { + // need to be > 2 so that shift works (otherwise undefined behavior!) + return false; + } + // we got space left, try to make info smaller + mInfoInc = static_cast(mInfoInc >> 1U); + + // remove one bit of the hash, leaving more space for the distance info. + // This is extremely fast because we can operate on 8 bytes at once. + ++mInfoHashShift; + auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); + + for (size_t i = 0; i < numElementsWithBuffer; i += 8) { + auto val = unaligned_load(mInfo + i); + val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f); + std::memcpy(mInfo + i, &val, sizeof(val)); + } + // update sentinel, which might have been cleared out! + mInfo[numElementsWithBuffer] = 1; + + mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); + return true; + } + + // True if resize was possible, false otherwise + bool increase_size() { + // nothing allocated yet? just allocate InitialNumElements + if (0 == mMask) { + initData(InitialNumElements); + return true; + } + + auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); + if (mNumElements < maxNumElementsAllowed && try_increase_info()) { + return true; + } + + ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed=" + << maxNumElementsAllowed << ", load=" + << (static_cast(mNumElements) * 100.0 / + (static_cast(mMask) + 1))) + + if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { + // we have to resize, even though there would still be plenty of space left! + // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case + // we have to rehash a few times + nextHashMultiplier(); + rehashPowerOfTwo(mMask + 1, true); + } else { + // we've reached the capacity of the map, so the hash seems to work nice. Keep using it. + rehashPowerOfTwo((mMask + 1) * 2, false); + } + return true; + } + + void nextHashMultiplier() { + // adding an *even* number, so that the multiplier will always stay odd. This is necessary + // so that the hash stays a mixing function (and thus doesn't have any information loss). + mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54); + } + + void destroy() { + if (0 == mMask) { + // don't deallocate! + return; + } + + Destroyer::value>{} + .nodesDoNotDeallocate(*this); + + // This protection against not deleting mMask shouldn't be needed as it's sufficiently + // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise + // reports a compile error: attempt to free a non-heap object 'fm' + // [-Werror=free-nonheap-object] + if (mKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); + } + } + + void init() noexcept { + mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); + mInfo = reinterpret_cast(&mMask); + mNumElements = 0; + mMask = 0; + mMaxNumElementsAllowed = 0; + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + } + + // members are sorted so no padding occurs + uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53); // 8 byte 8 + Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 16 + uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 24 + size_t mNumElements = 0; // 8 byte 32 + size_t mMask = 0; // 8 byte 40 + size_t mMaxNumElementsAllowed = 0; // 8 byte 48 + InfoType mInfoInc = InitialInfoInc; // 4 byte 52 + InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 56 + // 16 byte 56 if NodeAllocator +}; + +} // namespace detail + +// map + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_flat_map = detail::Table; + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_node_map = detail::Table; + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_map = + detail::Table) <= sizeof(size_t) * 6 && + std::is_nothrow_move_constructible>::value && + std::is_nothrow_move_assignable>::value, + MaxLoadFactor100, Key, T, Hash, KeyEqual>; + +// set + +template , typename KeyEqual = std::equal_to, + size_t MaxLoadFactor100 = 80> +using unordered_flat_set = detail::Table; + +template , typename KeyEqual = std::equal_to, + size_t MaxLoadFactor100 = 80> +using unordered_node_set = detail::Table; + +template , typename KeyEqual = std::equal_to, + size_t MaxLoadFactor100 = 80> +using unordered_set = detail::Table::value && + std::is_nothrow_move_assignable::value, + MaxLoadFactor100, Key, void, Hash, KeyEqual>; + +} // namespace robin_hood + +#endif From 5ca38bfc50bb9bbafa3c5d30d2969fd115349ccf Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 15:37:29 -0800 Subject: [PATCH 06/49] Change typedef to macro for map --- src/Common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common.hpp b/src/Common.hpp index 940a4a8..98b0395 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -13,7 +13,7 @@ #define BUSTOOLS_VERSION "0.42.0" -typedef robin_hood::unordered_flat_map u_map_; +#define u_map_ robin_hood::unordered_flat_map enum CAPTURE_TYPE : char { CAPTURE_NONE = 0, From 2a74dc3e7a6bf4092b0a812b3fb5f09c446d831f Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 15:50:17 -0800 Subject: [PATCH 07/49] fix bustools inspect --- src/bustools_inspect.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bustools_inspect.cpp b/src/bustools_inspect.cpp index 2808474..25694cf 100644 --- a/src/bustools_inspect.cpp +++ b/src/bustools_inspect.cpp @@ -100,7 +100,7 @@ void bustools_inspect(Bustools_opt &opt) { int64_t gt_records = 0; /* Frequency of number of targets per set, with multiplicity. */ - u_map_ freq_targetsPerSet; + std::unordered_map freq_targetsPerSet; /* Frequency of targets (for Good-Toulmin). */ std::vector freq_targets(numTargets, 0); @@ -273,7 +273,7 @@ void bustools_inspect(Bustools_opt &opt) { // Good-Toulmin for number of targets // Also number of targets detected uint64_t targetsDetected = 0; - u_map_ freq_freq_targets; + std::unordered_map freq_freq_targets; for (const auto &elt : freq_targets) { if (elt) { ++targetsDetected; From 41f631e7864816041476c549b0eeacae38895fb7 Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 16:02:55 -0800 Subject: [PATCH 08/49] fix mash --- src/bustools_mash.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bustools_mash.cpp b/src/bustools_mash.cpp index 8faa65d..1df9702 100644 --- a/src/bustools_mash.cpp +++ b/src/bustools_mash.cpp @@ -105,8 +105,10 @@ void bustools_mash(const Bustools_opt &opt) for (int32_t i = 0; i < tid; i++) { - oh.ecs.push_back({i}); - ecmapinv.insert({{i}, i}); + std::vector tmp_vec; + tmp_vec.push_back(i); + oh.ecs.push_back(tmp_vec); + ecmapinv.insert({tmp_vec, i}); } std::vector> eids_per_file; From 07eeb6686c57f7a8e4ac9a0e24dd5827f404cb8a Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 16:06:17 -0800 Subject: [PATCH 09/49] fix undefined reference --- src/Common.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common.cpp b/src/Common.cpp index 717ee24..5a81674 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -73,7 +73,7 @@ std::vector intersect_vectors(const std::vector> & return std::move(u); } -int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, std::unordered_map, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes) { +int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes) { if (ecs.empty()) { return -1; } @@ -212,7 +212,7 @@ void intersect_genes_of_ecs(const std::vector &ecs, const std::vector< } -int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, std::unordered_map, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty) { +int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty) { std::vector> gu; // per gene transcript results std::vector u; // final list of transcripts From bf16198a9aafba5e93a3bef1d229723e83a8ba44 Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 16:34:44 -0800 Subject: [PATCH 10/49] better hashing --- src/Common.hpp | 24 +++++- src/hash.cpp | 194 +++++++++++++++++++++++++++++++++++++++++++++++++ src/hash.hpp | 22 ++++++ 3 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 src/hash.cpp create mode 100644 src/hash.hpp diff --git a/src/Common.hpp b/src/Common.hpp index 98b0395..cab0932 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -10,6 +10,8 @@ #include #include #include "robin_hood.h" +#include "roaring.h" +#include "hash.hpp" #define BUSTOOLS_VERSION "0.42.0" @@ -160,14 +162,32 @@ struct SortedVectorHasher int i = 0; for (auto x : v) { - uint64_t t = std::hash{}(x); + uint64_t t; + MurmurHash3_x64_64(&x,sizeof(x), 0,&t); t = (x >> i) | (x << (64 - i)); r = r ^ t; - i = (i + 1) % 64; + i = (i+1)&63; } return r; } }; + +struct RoaringHasher { + size_t operator()(const Roaring& rr) const { + uint64_t r = 0; + int i=0; + for (auto x : rr) { + uint64_t t; + MurmurHash3_x64_64(&x, sizeof(x), 0, &t); + t = (x>>i) | (x<<(64-i)); + r ^= t; + i = (i+1)&63; // (i+1)%64 + } + return r; + } +}; +typedef u_map_ EcMapInv; + std::vector intersect(std::vector &u, std::vector &v); std::vector union_vectors(const std::vector> &v); std::vector intersect_vectors(const std::vector> &v); diff --git a/src/hash.cpp b/src/hash.cpp new file mode 100644 index 0000000..b2d18e1 --- /dev/null +++ b/src/hash.cpp @@ -0,0 +1,194 @@ +#include +#include +#include "hash.hpp" + +uint64_t inline _rotl64(uint64_t value, int8_t amount) { + return ((value) << (amount)) | ((value) >> (64 - (amount))); +} + +uint32_t SuperFastHash (const char *data, int len) { + uint32_t hash = len, tmp; + int rem; + + if (len <= 0 || data == NULL) { return 0; } + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (; len > 0; len--) { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + + + + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +inline uint64_t getblock ( const uint64_t *p, int i ) { + return p[i]; +} + +//---------- +// Block mix - combine the key bits with the hash bits and scramble everything + +inline void bmix64 ( uint64_t& h1, uint64_t& h2, uint64_t& k1, uint64_t& k2, uint64_t& c1, uint64_t& c2 ) { + k1 *= c1; + k1 = _rotl64(k1,23); + k1 *= c2; + h1 ^= k1; + h1 += h2; + + h2 = _rotl64(h2,41); + + k2 *= c2; + k2 = _rotl64(k2,23); + k2 *= c1; + h2 ^= k2; + h2 += h1; + + h1 = h1*3+0x52dce729; + h2 = h2*3+0x38495ab5; + + c1 = c1*5+0x7b7d159c; + c2 = c2*5+0x6bce6396; +} + +//---------- +// Finalization mix - avalanches all bits to within 0.05% bias + +inline uint64_t fmix64 ( uint64_t k ) { + k ^= k >> 33; + k *= 0xff51afd7ed558ccd; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53; + k ^= k >> 33; + + return k; +} + +void MurmurHash3_x64_128 ( const void *key, const int len, const uint32_t seed, void *out ) { + const uint8_t *data = (const uint8_t *)key; + const int nblocks = len / 16; + + uint64_t h1 = 0x9368e53c2f6af274 ^ seed; + uint64_t h2 = 0x586dcd208f7cd3fd ^ seed; + + uint64_t c1 = 0x87c37b91114253d5; + uint64_t c2 = 0x4cf5ad432745937f; + + //---------- + // body + + const uint64_t *blocks = (const uint64_t *)(data); + + for(int i = 0; i < nblocks; i++) { + uint64_t k1 = getblock(blocks,i*2+0); + uint64_t k2 = getblock(blocks,i*2+1); + + bmix64(h1,h2,k1,k2,c1,c2); + } + + //---------- + // tail + + const uint8_t *tail = (const uint8_t *)(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) { + case 15: k2 ^= uint64_t(tail[14]) << 48; + case 14: k2 ^= uint64_t(tail[13]) << 40; + case 13: k2 ^= uint64_t(tail[12]) << 32; + case 12: k2 ^= uint64_t(tail[11]) << 24; + case 11: k2 ^= uint64_t(tail[10]) << 16; + case 10: k2 ^= uint64_t(tail[ 9]) << 8; + case 9: k2 ^= uint64_t(tail[ 8]) << 0; + + case 8: k1 ^= uint64_t(tail[ 7]) << 56; + case 7: k1 ^= uint64_t(tail[ 6]) << 48; + case 6: k1 ^= uint64_t(tail[ 5]) << 40; + case 5: k1 ^= uint64_t(tail[ 4]) << 32; + case 4: k1 ^= uint64_t(tail[ 3]) << 24; + case 3: k1 ^= uint64_t(tail[ 2]) << 16; + case 2: k1 ^= uint64_t(tail[ 1]) << 8; + case 1: k1 ^= uint64_t(tail[ 0]) << 0; + bmix64(h1,h2,k1,k2,c1,c2); + }; + + //---------- + // finalization + + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t *)out)[0] = h1; + ((uint64_t *)out)[1] = h2; +} + +//----------------------------------------------------------------------------- +// If we need a smaller hash value, it's faster to just use a portion of the +// 128-bit hash + +void MurmurHash3_x64_32 ( const void *key, int len, uint32_t seed, void *out ) { + uint32_t temp[4]; + + MurmurHash3_x64_128(key,len,seed,temp); + + *(uint32_t *)out = temp[0]; +} + +//---------- + +void MurmurHash3_x64_64 ( const void *key, int len, uint32_t seed, void *out ) { + uint64_t temp[2]; + + MurmurHash3_x64_128(key,len,seed,temp); + + *(uint64_t *)out = temp[0]; +} + +//----------------------------------------------------------------------------- + diff --git a/src/hash.hpp b/src/hash.hpp new file mode 100644 index 0000000..ab2c0d1 --- /dev/null +++ b/src/hash.hpp @@ -0,0 +1,22 @@ +#ifndef HASH_H +#define HASH_H + +#include /* Replace with if appropriate */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif + +uint32_t SuperFastHash (const char *data, int len); + +//void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out ); +void MurmurHash3_x64_64 ( const void *key, int len, uint32_t seed, void *out ); + +#endif + From 7dd1eb82105a364494946c7043e21fc4aaff051e Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 17:00:14 -0800 Subject: [PATCH 11/49] fix roaring --- src/Common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common.hpp b/src/Common.hpp index cab0932..a0985c3 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -10,7 +10,7 @@ #include #include #include "robin_hood.h" -#include "roaring.h" +#include "roaring.hh" #include "hash.hpp" #define BUSTOOLS_VERSION "0.42.0" From 19b3674900d9ee283877b243c391f42c62185168 Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 21:01:24 -0800 Subject: [PATCH 12/49] ecmapinv: attempt to use bitmap --- src/Common.cpp | 86 ++++++++++++++++++------------------------ src/Common.hpp | 4 +- src/bustools_count.cpp | 8 ++-- 3 files changed, 42 insertions(+), 56 deletions(-) diff --git a/src/Common.cpp b/src/Common.cpp index 5a81674..9a95e54 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -73,7 +73,7 @@ std::vector intersect_vectors(const std::vector> & return std::move(u); } -int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes) { +int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes) { if (ecs.empty()) { return -1; } @@ -86,58 +86,35 @@ int32_t intersect_ecs(const std::vector &ecs, std::vector &u, return ecs[0]; // no work } - u.resize(0); - auto &v = ecmap[ecs[0]]; // copy - for (size_t i = 0; i< v.size(); i++) { - u.push_back(v[i]); - } - + uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ecs[0]][0]))); + u = Roaring(ecmap[ecs[0]].size(), data); + for (size_t i = 1; i < ecs.size(); i++) { if (ecs[i] < 0 || ecs[i] >= ecmap.size()) { return -1; } - const auto &v = ecmap[ecs[i]]; - - int j = 0; - int k = 0; - int l = 0; - int n = u.size(); - int m = v.size(); - // u and v are sorted, j,k,l = 0 - while (j < n && l < m) { - // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m - // u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted - if (u[j] < v[l]) { - j++; - } else if (u[j] > v[l]) { - l++; - } else { - // match - if (k < j) { - std::swap(u[k], u[j]); - } - k++; - j++; - l++; - } - } - if (k < n) { - u.resize(k); - } + data = reinterpret_cast(const_cast(&(ecmap[ecs[i]][0]))); + u &= Roaring(ecmap[ecs[i]].size(), data); } - if (u.empty()) { + if (u.isEmpty()) { return -1; } auto iit = ecmapinv.find(u); if (iit == ecmapinv.end()) { // create new equivalence class int32_t ec = ecmap.size(); - ecmap.push_back(u); + uint32_t* u_arr = new uint32_t[u.cardinality()]; + u.toUint32Array(u_arr); + std::vector u_vec; + u_vec.reserve(u.cardinality()); + for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); + delete[] u_arr; + ecmap.push_back(u_vec); ecmapinv.insert({u,ec}); // figure out the gene list std::vector v; - vt2gene(u, genemap, v); + vt2gene(u_vec, genemap, v); ec2genes.push_back(std::move(v)); return ec; } else { @@ -212,10 +189,10 @@ void intersect_genes_of_ecs(const std::vector &ecs, const std::vector< } -int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty) { +int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty) { std::vector> gu; // per gene transcript results - std::vector u; // final list of transcripts + Roaring u; // final list of transcripts std::vector glist; int32_t lastg = -2; @@ -245,11 +222,9 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec // frequent case, single gene replace with union for (auto ec : ecs) { for (const auto &t : ecmap[ec]) { - u.push_back(t); + u.add(t); } } - std::sort(u.begin(), u.end()); - u.erase(std::unique(u.begin(), u.end()), u.end()); // look up ecs based on u int32_t ec = -1; @@ -260,9 +235,15 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - ecmap.push_back(u); + uint32_t* u_arr = new uint32_t[u.cardinality()]; + u.toUint32Array(u_arr); + std::vector u_vec; + u_vec.reserve(u.cardinality()); + for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); + delete[] u_arr; + ecmap.push_back(u_vec); std::vector v; - vt2gene(u, genemap, v); + vt2gene(u_vec, genemap, v); ec2genes.push_back(std::move(v)); } @@ -291,14 +272,13 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } for (auto t : uu) { - u.push_back(t); + u.add(t); } } - if (u.empty()) { + if (u.isEmpty()) { return -1; } - std::sort(u.begin(), u.end()); int32_t ec = -1; auto it = ecmapinv.find(u); @@ -307,9 +287,15 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - ecmap.push_back(u); + uint32_t* u_arr = new uint32_t[u.cardinality()]; + u.toUint32Array(u_arr); + std::vector u_vec; + u_vec.reserve(u.cardinality()); + for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); + delete[] u_arr; + ecmap.push_back(u_vec); std::vector v; - vt2gene(u, genemap, v); + vt2gene(u_vec, genemap, v); ec2genes.push_back(std::move(v)); } return ec; diff --git a/src/Common.hpp b/src/Common.hpp index a0985c3..778cc12 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -191,10 +191,10 @@ typedef u_map_ EcMapInv; std::vector intersect(std::vector &u, std::vector &v); std::vector union_vectors(const std::vector> &v); std::vector intersect_vectors(const std::vector> &v); -int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes); +int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); void vt2gene(const std::vector &v, const std::vector &genemap, std::vector &glist); void intersect_genes_of_ecs(const std::vector &ecs, const std::vector> &ec2genes, std::vector &glist); -int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, u_map_, int32_t, SortedVectorHasher> &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); +int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); void create_ec2genes(const std::vector> &ecmap, const std::vector &genemap, std::vector> &ec2gene); COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector> &ecmap, const std::vector& tx_split); COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector& ecs, const std::vector> &ecmap, const std::vector& tx_split); diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index b4a367f..b5e50d5 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -18,7 +18,7 @@ void bustools_count(Bustools_opt &opt) { // read and parse the equivalence class files - u_map_, int32_t, SortedVectorHasher> ecmapinv; + EcMapInv ecmapinv; std::vector> ecmap; u_map_ txnames; @@ -34,7 +34,8 @@ void bustools_count(Bustools_opt &opt) { ecmap = std::move(h.ecs); ecmapinv.reserve(ecmap.size()); for (int32_t ec = 0; ec < ecmap.size(); ec++) { - ecmapinv.insert({ecmap[ec], ec}); + uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ec][0]))); + ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec}); } std::vector> ec2genes; create_ec2genes(ecmap, genemap, ec2genes); @@ -85,8 +86,7 @@ void bustools_count(Bustools_opt &opt) { std::vector ecs; std::vector glist; ecs.reserve(100); - std::vector u; - u.reserve(100); + Roaring u; std::vector column_v; std::vector>> column_vp; // gene, {count, matrix type} if (!opt.count_collapse) { From 097f99181c0df5db2a5b7ffb3d99043a9d3f76fb Mon Sep 17 00:00:00 2001 From: Yenaled Date: Mon, 23 Jan 2023 21:04:02 -0800 Subject: [PATCH 13/49] fix --- src/Common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common.hpp b/src/Common.hpp index 778cc12..4cc8596 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -191,7 +191,7 @@ typedef u_map_ EcMapInv; std::vector intersect(std::vector &u, std::vector &v); std::vector union_vectors(const std::vector> &v); std::vector intersect_vectors(const std::vector> &v); -int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); +int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); void vt2gene(const std::vector &v, const std::vector &genemap, std::vector &glist); void intersect_genes_of_ecs(const std::vector &ecs, const std::vector> &ec2genes, std::vector &glist); int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); From 67ebc4632596ab21cd13c16619ec520273eb3858 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 6 Feb 2023 03:44:12 -0800 Subject: [PATCH 14/49] added -s count option to main --- src/bustools_main.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index 384dc2f..991c099 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -328,7 +328,7 @@ void parse_ProgramOptions_capture(int argc, char **argv, Bustools_opt &opt) void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) { - const char *opt_string = "o:g:e:t:md:"; + const char *opt_string = "o:g:e:t:md:s:"; int gene_flag = 0; int umigene_flag = 0; int em_flag = 0; @@ -348,6 +348,7 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) {"hist", no_argument, &hist_flag, 1}, {"downsample", required_argument, 0, 'd'}, {"rawcounts", no_argument, &rawcounts_flag, 1}, + {"split", required_argument, 0, 's'}, {0, 0, 0, 0}}; int option_index = 0, c; @@ -375,6 +376,9 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) case 'm': opt.count_gene_multimapping = true; break; + case 's': + opt.count_split = optarg; + break; default: break; } From 3f38e15e0393906e191d9ce7659fb849a39a833a Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 6 Feb 2023 05:32:05 -0800 Subject: [PATCH 15/49] fix split --- src/bustools_count.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index b5e50d5..2347cf4 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -24,8 +24,6 @@ void bustools_count(Bustools_opt &opt) { u_map_ txnames; auto txnames_split = txnames; // copy std::vector tx_split; - tx_split.reserve(txnames_split.size()); - for (auto x : txnames_split) tx_split.push_back(txnames[x.first]); parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); u_map_ genenames; @@ -66,6 +64,10 @@ void bustools_count(Bustools_opt &opt) { // If we need to split matrix if (count_split) { parseTranscripts(opt.count_split, txnames_split); // subset of txnames + tx_split.reserve(txnames_split.size()); + for (auto x : txnames_split) { + if (txnames.count(x.first)) tx_split.push_back(txnames[x.first]); + } of_2.open(mtx_ofn_split_2); of_A.open(mtx_ofn_split_A); of_2 << ssHeader.str(); From 73bf6169d7ef12a49ea72c9894edc20a4cc6c239 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 7 Feb 2023 08:57:00 -0800 Subject: [PATCH 16/49] updates to make 3-matrix sparse --- src/bustools_count.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index 2347cf4..e2a63a7 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -414,17 +414,17 @@ void bustools_count(Bustools_opt &opt) { else if (mtx_type == COUNT_SPLIT) val_2 += column_vp[j].second.first; else val_A += column_vp[j].second.first; } - col_map.insert({column_vp[i].first,val}); + if (!count_split || val != 0) col_map.insert({column_vp[i].first,val}); if (count_split) { - col_map_2.insert({column_vp[i].first,val_2}); - col_map_A.insert({column_vp[i].first,val_A}); + if (val_2 != 0) col_map_2.insert({column_vp[i].first,val_2}); + if (val_A != 0) col_map_A.insert({column_vp[i].first,val_A}); } cols.push_back(column_vp[i].first); if (count_split) { - if (val > 0) n_entries++; - if (val_2 > 0) n_entries_2++; - if (val_A > 0) n_entries_A++; + if (val != 0) n_entries++; + if (val_2 != 0) n_entries_2++; + if (val_A != 0) n_entries_A++; } else { n_entries++; } From 2ac7d9f4cd57cc3d951863c42d0c1a4377e38c8e Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 27 Feb 2023 23:25:35 -0800 Subject: [PATCH 17/49] mask length in bustools correct --- src/bustools_correct.cpp | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 588b744..eab438c 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -258,6 +258,7 @@ void bustools_split_correct(Bustools_opt &opt) } int rc = 0; + uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); @@ -276,7 +277,7 @@ void bustools_split_correct(Bustools_opt &opt) bd = p[i]; - uint64_t b = bd.barcode; + uint64_t b = bd.barcode & len_mask; uint64_t bc12 = b & mask_12; uint64_t bc34 = (b >> (2 * len_12)) & mask_34; @@ -366,14 +367,14 @@ void bustools_split_correct(Bustools_opt &opt) if (dump_bool) { - if (bd.barcode != old_barcode) + if (bd.barcode & len_mask != old_barcode) { - of << binaryToString(bd.barcode, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; - old_barcode = bd.barcode; + of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; + old_barcode = bd.barcode & len_mask; } } - bd.barcode = b_corrected; + bd.barcode = b_corrected | (bd.barcode & ~len_mask); bus_out.write((char *)&bd, sizeof(bd)); if (corrected_12_flag && corrected_34_flag) @@ -533,6 +534,7 @@ void bustools_correct(Bustools_opt &opt) } int rc = 0; + uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); @@ -546,7 +548,7 @@ void bustools_correct(Bustools_opt &opt) for (size_t i = 0; i < rc; i++) { bd = p[i]; - auto it = wbc.find(bd.barcode); + auto it = wbc.find(bd.barcode & len_mask); if (it != wbc.end()) { stat_white++; @@ -554,7 +556,7 @@ void bustools_correct(Bustools_opt &opt) } else { - uint64_t b = bd.barcode; + uint64_t b = bd.barcode & len_mask; uint64_t lb = b & lower_mask; uint64_t ub = (b >> (2 * bc2)) & upper_mask; uint64_t lbc = 0, ubc = 0; @@ -572,14 +574,14 @@ void bustools_correct(Bustools_opt &opt) uint64_t b_corrected = (ub << (2 * bc2)) | lbc; if (dump_bool) { - if (bd.barcode != old_barcode) + if (bd.barcode & len_mask != old_barcode) { - of << binaryToString(bd.barcode, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; - old_barcode = bd.barcode; + of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; + old_barcode = bd.barcode & len_mask; } } - bd.barcode = b_corrected; + bd.barcode = b_corrected | (bd.barcode & ~len_mask); bus_out.write((char *)&bd, sizeof(bd)); stat_corr++; } @@ -588,14 +590,14 @@ void bustools_correct(Bustools_opt &opt) uint64_t b_corrected = (ubc << (2 * bc2)) | lb; if (dump_bool) { - if (bd.barcode != old_barcode) + if (bd.barcode & len_mask != old_barcode) { - of << binaryToString(bd.barcode, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; - old_barcode = bd.barcode; + of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; + old_barcode = bd.barcode & len_mask; } } - bd.barcode = b_corrected; + bd.barcode = b_corrected | (bd.barcode & ~len_mask); bus_out.write((char *)&bd, sizeof(bd)); stat_corr++; } @@ -622,4 +624,4 @@ void bustools_correct(Bustools_opt &opt) delete[] p; p = nullptr; -} \ No newline at end of file +} From 3147f45c899fccb781410e5f688a0878ff903539 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 28 Feb 2023 00:24:15 -0800 Subject: [PATCH 18/49] fix len_mask --- src/bustools_correct.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index eab438c..19f688d 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -258,7 +258,7 @@ void bustools_split_correct(Bustools_opt &opt) } int rc = 0; - uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen + uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); @@ -534,7 +534,7 @@ void bustools_correct(Bustools_opt &opt) } int rc = 0; - uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen + uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); From 3d3df8818e5fcea880290907d84e5c4ffd50bd55 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 28 Feb 2023 00:39:29 -0800 Subject: [PATCH 19/49] fix len_mask again --- src/bustools_correct.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 19f688d..9c1c12b 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -258,7 +258,7 @@ void bustools_split_correct(Bustools_opt &opt) } int rc = 0; - uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen + uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); @@ -534,7 +534,7 @@ void bustools_correct(Bustools_opt &opt) } int rc = 0; - uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen + uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); From e3b6ff4ea2a9f4e69ed4b446e98898ffb4ffe656 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 28 Feb 2023 21:07:32 -0800 Subject: [PATCH 20/49] undo bitmap and unordered map --- src/Common.cpp | 84 +- src/Common.hpp | 7 +- src/bustools_count.cpp | 6 +- src/robin_hood.h | 2544 ---------------------------------------- 4 files changed, 55 insertions(+), 2586 deletions(-) delete mode 100644 src/robin_hood.h diff --git a/src/Common.cpp b/src/Common.cpp index 9a95e54..b4770bd 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -73,7 +73,7 @@ std::vector intersect_vectors(const std::vector> & return std::move(u); } -int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes) { +int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes) { if (ecs.empty()) { return -1; } @@ -85,36 +85,59 @@ int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::ve if (ecs.size() == 1) { return ecs[0]; // no work } - - uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ecs[0]][0]))); - u = Roaring(ecmap[ecs[0]].size(), data); + u.resize(0); + auto &v = ecmap[ecs[0]]; // copy + for (size_t i = 0; i< v.size(); i++) { + u.push_back(v[i]); + } + for (size_t i = 1; i < ecs.size(); i++) { if (ecs[i] < 0 || ecs[i] >= ecmap.size()) { return -1; } - data = reinterpret_cast(const_cast(&(ecmap[ecs[i]][0]))); - u &= Roaring(ecmap[ecs[i]].size(), data); + const auto &v = ecmap[ecs[i]]; + + int j = 0; + int k = 0; + int l = 0; + int n = u.size(); + int m = v.size(); + // u and v are sorted, j,k,l = 0 + while (j < n && l < m) { + // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m + // u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted + if (u[j] < v[l]) { + j++; + } else if (u[j] > v[l]) { + l++; + } else { + // match + if (k < j) { + std::swap(u[k], u[j]); + } + k++; + j++; + i++; + } + } + if (k < n) { + u.resize(k); + } } - if (u.isEmpty()) { + if (u.empty()) { return -1; } auto iit = ecmapinv.find(u); if (iit == ecmapinv.end()) { // create new equivalence class int32_t ec = ecmap.size(); - uint32_t* u_arr = new uint32_t[u.cardinality()]; - u.toUint32Array(u_arr); - std::vector u_vec; - u_vec.reserve(u.cardinality()); - for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); - delete[] u_arr; - ecmap.push_back(u_vec); + ecmap.push_back(u); ecmapinv.insert({u,ec}); // figure out the gene list std::vector v; - vt2gene(u_vec, genemap, v); + vt2gene(u, genemap, v); ec2genes.push_back(std::move(v)); return ec; } else { @@ -192,7 +215,7 @@ void intersect_genes_of_ecs(const std::vector &ecs, const std::vector< int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty) { std::vector> gu; // per gene transcript results - Roaring u; // final list of transcripts + std::vector u; // final list of transcripts std::vector glist; int32_t lastg = -2; @@ -222,9 +245,11 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec // frequent case, single gene replace with union for (auto ec : ecs) { for (const auto &t : ecmap[ec]) { - u.add(t); + u.push_back(t); } } + std::sort(u.begin(), u.end()); + u.erase(std::unique(u.begin(), u.end()), u.end()); // look up ecs based on u int32_t ec = -1; @@ -235,15 +260,9 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - uint32_t* u_arr = new uint32_t[u.cardinality()]; - u.toUint32Array(u_arr); - std::vector u_vec; - u_vec.reserve(u.cardinality()); - for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); - delete[] u_arr; - ecmap.push_back(u_vec); + ecmap.push_back(u); std::vector v; - vt2gene(u_vec, genemap, v); + vt2gene(u, genemap, v); ec2genes.push_back(std::move(v)); } @@ -272,13 +291,14 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } for (auto t : uu) { - u.add(t); + u.push_back(t); } } - if (u.isEmpty()) { + if (u.empty()) { return -1; } + std::sort(u.begin(), u.end()); int32_t ec = -1; auto it = ecmapinv.find(u); @@ -287,15 +307,9 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - uint32_t* u_arr = new uint32_t[u.cardinality()]; - u.toUint32Array(u_arr); - std::vector u_vec; - u_vec.reserve(u.cardinality()); - for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); - delete[] u_arr; - ecmap.push_back(u_vec); + ecmap.push_back(u); std::vector v; - vt2gene(u_vec, genemap, v); + vt2gene(u, genemap, v); ec2genes.push_back(std::move(v)); } return ec; diff --git a/src/Common.hpp b/src/Common.hpp index 4cc8596..6a50ebd 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -9,13 +9,12 @@ #include #include #include -#include "robin_hood.h" #include "roaring.hh" #include "hash.hpp" #define BUSTOOLS_VERSION "0.42.0" -#define u_map_ robin_hood::unordered_flat_map +#define u_map_ std::unordered_map enum CAPTURE_TYPE : char { CAPTURE_NONE = 0, @@ -186,12 +185,12 @@ struct RoaringHasher { return r; } }; -typedef u_map_ EcMapInv; +typedef u_map_, int32_t, SortedVectorHasher> EcMapInv; std::vector intersect(std::vector &u, std::vector &v); std::vector union_vectors(const std::vector> &v); std::vector intersect_vectors(const std::vector> &v); -int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); +int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); void vt2gene(const std::vector &v, const std::vector &genemap, std::vector &glist); void intersect_genes_of_ecs(const std::vector &ecs, const std::vector> &ec2genes, std::vector &glist); int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index e2a63a7..e0d125f 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -32,8 +32,7 @@ void bustools_count(Bustools_opt &opt) { ecmap = std::move(h.ecs); ecmapinv.reserve(ecmap.size()); for (int32_t ec = 0; ec < ecmap.size(); ec++) { - uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ec][0]))); - ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec}); + ecmapinv.insert({ecmap[ec], ec}); } std::vector> ec2genes; create_ec2genes(ecmap, genemap, ec2genes); @@ -88,7 +87,8 @@ void bustools_count(Bustools_opt &opt) { std::vector ecs; std::vector glist; ecs.reserve(100); - Roaring u; + std::vector u; + u.reserve(100); std::vector column_v; std::vector>> column_vp; // gene, {count, matrix type} if (!opt.count_collapse) { diff --git a/src/robin_hood.h b/src/robin_hood.h deleted file mode 100644 index 0af031f..0000000 --- a/src/robin_hood.h +++ /dev/null @@ -1,2544 +0,0 @@ -// ______ _____ ______ _________ -// ______________ ___ /_ ___(_)_______ ___ /_ ______ ______ ______ / -// __ ___/_ __ \__ __ \__ / __ __ \ __ __ \_ __ \_ __ \_ __ / -// _ / / /_/ /_ /_/ /_ / _ / / / _ / / // /_/ // /_/ // /_/ / -// /_/ \____/ /_.___/ /_/ /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/ -// _/_____/ -// -// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20 -// https://github.com/martinus/robin-hood-hashing -// -// Licensed under the MIT License . -// SPDX-License-Identifier: MIT -// Copyright (c) 2018-2021 Martin Ankerl -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#ifndef ROBIN_HOOD_H_INCLUDED -#define ROBIN_HOOD_H_INCLUDED - -// see https://semver.org/ -#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes -#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner -#define ROBIN_HOOD_VERSION_PATCH 5 // for backwards-compatible bug fixes - -#include -#include -#include -#include -#include -#include // only to support hash of smart pointers -#include -#include -#include -#include -#if __cplusplus >= 201703L -# include -#endif - -// #define ROBIN_HOOD_LOG_ENABLED -#ifdef ROBIN_HOOD_LOG_ENABLED -# include -# define ROBIN_HOOD_LOG(...) \ - std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; -#else -# define ROBIN_HOOD_LOG(x) -#endif - -// #define ROBIN_HOOD_TRACE_ENABLED -#ifdef ROBIN_HOOD_TRACE_ENABLED -# include -# define ROBIN_HOOD_TRACE(...) \ - std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; -#else -# define ROBIN_HOOD_TRACE(x) -#endif - -// #define ROBIN_HOOD_COUNT_ENABLED -#ifdef ROBIN_HOOD_COUNT_ENABLED -# include -# define ROBIN_HOOD_COUNT(x) ++counts().x; -namespace robin_hood { -struct Counts { - uint64_t shiftUp{}; - uint64_t shiftDown{}; -}; -inline std::ostream& operator<<(std::ostream& os, Counts const& c) { - return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl; -} - -static Counts& counts() { - static Counts counts{}; - return counts; -} -} // namespace robin_hood -#else -# define ROBIN_HOOD_COUNT(x) -#endif - -// all non-argument macros should use this facility. See -// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/ -#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x() - -// mark unused members with this macro -#define ROBIN_HOOD_UNUSED(identifier) - -// bitness -#if SIZE_MAX == UINT32_MAX -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32 -#elif SIZE_MAX == UINT64_MAX -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64 -#else -# error Unsupported bitness -#endif - -// endianess -#ifdef _MSC_VER -# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0 -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#endif - -// inline -#ifdef _MSC_VER -# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline) -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline)) -#endif - -// exceptions -#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0 -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1 -#endif - -// count leading/trailing bits -#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS) -# ifdef _MSC_VER -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 -# endif -# include -# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ - [](size_t mask) noexcept -> int { \ - unsigned long index; \ - return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ - : ROBIN_HOOD(BITNESS); \ - }(x) -# else -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll -# endif -# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) -# endif -#endif - -// fallthrough -#ifndef __has_cpp_attribute // For backwards compatibility -# define __has_cpp_attribute(x) 0 -#endif -#if __has_cpp_attribute(clang::fallthrough) -# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]] -#elif __has_cpp_attribute(gnu::fallthrough) -# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]] -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() -#endif - -// likely/unlikely -#ifdef _MSC_VER -# define ROBIN_HOOD_LIKELY(condition) condition -# define ROBIN_HOOD_UNLIKELY(condition) condition -#else -# define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1) -# define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0) -#endif - -// detect if native wchar_t type is availiable in MSVC -#ifdef _MSC_VER -# ifdef _NATIVE_WCHAR_T_DEFINED -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0 -# endif -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 -#endif - -// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr -#ifdef _MSC_VER -# if _MSC_VER <= 1900 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1 -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 -# endif -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 -#endif - -// workaround missing "is_trivially_copyable" in g++ < 5.0 -// See https://stackoverflow.com/a/31798726/48181 -#if defined(__GNUC__) && __GNUC__ < 5 -# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) -#else -# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value -#endif - -// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L - -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) -# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]] -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() -#endif - -namespace robin_hood { - -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) -# define ROBIN_HOOD_STD std -#else - -// c++11 compatibility layer -namespace ROBIN_HOOD_STD { -template -struct alignment_of - : std::integral_constant::type)> {}; - -template -class integer_sequence { -public: - using value_type = T; - static_assert(std::is_integral::value, "not integral type"); - static constexpr std::size_t size() noexcept { - return sizeof...(Ints); - } -}; -template -using index_sequence = integer_sequence; - -namespace detail_ { -template -struct IntSeqImpl { - using TValue = T; - static_assert(std::is_integral::value, "not integral type"); - static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)"); - - template - struct IntSeqCombiner; - - template - struct IntSeqCombiner, integer_sequence> { - using TResult = integer_sequence; - }; - - using TResult = - typename IntSeqCombiner::TResult, - typename IntSeqImpl::TResult>::TResult; -}; - -template -struct IntSeqImpl { - using TValue = T; - static_assert(std::is_integral::value, "not integral type"); - static_assert(Begin >= 0, "unexpected argument (Begin<0)"); - using TResult = integer_sequence; -}; - -template -struct IntSeqImpl { - using TValue = T; - static_assert(std::is_integral::value, "not integral type"); - static_assert(Begin >= 0, "unexpected argument (Begin<0)"); - using TResult = integer_sequence; -}; -} // namespace detail_ - -template -using make_integer_sequence = typename detail_::IntSeqImpl::TResult; - -template -using make_index_sequence = make_integer_sequence; - -template -using index_sequence_for = make_index_sequence; - -} // namespace ROBIN_HOOD_STD - -#endif - -namespace detail { - -// make sure we static_cast to the correct type for hash_int -#if ROBIN_HOOD(BITNESS) == 64 -using SizeT = uint64_t; -#else -using SizeT = uint32_t; -#endif - -template -T rotr(T x, unsigned k) { - return (x >> k) | (x << (8U * sizeof(T) - k)); -} - -// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to -// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with -// care! -template -inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept { - return reinterpret_cast(ptr); -} - -template -inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { - return reinterpret_cast(ptr); -} - -// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other -// inlinings more difficult. Throws are also generally the slow path. -template -[[noreturn]] ROBIN_HOOD(NOINLINE) -#if ROBIN_HOOD(HAS_EXCEPTIONS) - void doThrow(Args&&... args) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) - throw E(std::forward(args)...); -} -#else - void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { - abort(); -} -#endif - -template -T* assertNotNull(T* t, Args&&... args) { - if (ROBIN_HOOD_UNLIKELY(nullptr == t)) { - doThrow(std::forward(args)...); - } - return t; -} - -template -inline T unaligned_load(void const* ptr) noexcept { - // using memcpy so we don't get into unaligned load problems. - // compiler should optimize this very well anyways. - T t; - std::memcpy(&t, ptr, sizeof(T)); - return t; -} - -// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor, -// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a -// pointer. -template -class BulkPoolAllocator { -public: - BulkPoolAllocator() noexcept = default; - - // does not copy anything, just creates a new allocator. - BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept - : mHead(nullptr) - , mListForFree(nullptr) {} - - BulkPoolAllocator(BulkPoolAllocator&& o) noexcept - : mHead(o.mHead) - , mListForFree(o.mListForFree) { - o.mListForFree = nullptr; - o.mHead = nullptr; - } - - BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept { - reset(); - mHead = o.mHead; - mListForFree = o.mListForFree; - o.mListForFree = nullptr; - o.mHead = nullptr; - return *this; - } - - BulkPoolAllocator& - // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) - operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept { - // does not do anything - return *this; - } - - ~BulkPoolAllocator() noexcept { - reset(); - } - - // Deallocates all allocated memory. - void reset() noexcept { - while (mListForFree) { - T* tmp = *mListForFree; - ROBIN_HOOD_LOG("std::free") - std::free(mListForFree); - mListForFree = reinterpret_cast_no_cast_align_warning(tmp); - } - mHead = nullptr; - } - - // allocates, but does NOT initialize. Use in-place new constructor, e.g. - // T* obj = pool.allocate(); - // ::new (static_cast(obj)) T(); - T* allocate() { - T* tmp = mHead; - if (!tmp) { - tmp = performAllocation(); - } - - mHead = *reinterpret_cast_no_cast_align_warning(tmp); - return tmp; - } - - // does not actually deallocate but puts it in store. - // make sure you have already called the destructor! e.g. with - // obj->~T(); - // pool.deallocate(obj); - void deallocate(T* obj) noexcept { - *reinterpret_cast_no_cast_align_warning(obj) = mHead; - mHead = obj; - } - - // Adds an already allocated block of memory to the allocator. This allocator is from now on - // responsible for freeing the data (with free()). If the provided data is not large enough to - // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor. - void addOrFree(void* ptr, const size_t numBytes) noexcept { - // calculate number of available elements in ptr - if (numBytes < ALIGNMENT + ALIGNED_SIZE) { - // not enough data for at least one element. Free and return. - ROBIN_HOOD_LOG("std::free") - std::free(ptr); - } else { - ROBIN_HOOD_LOG("add to buffer") - add(ptr, numBytes); - } - } - - void swap(BulkPoolAllocator& other) noexcept { - using std::swap; - swap(mHead, other.mHead); - swap(mListForFree, other.mListForFree); - } - -private: - // iterates the list of allocated memory to calculate how many to alloc next. - // Recalculating this each time saves us a size_t member. - // This ignores the fact that memory blocks might have been added manually with addOrFree. In - // practice, this should not matter much. - ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept { - auto tmp = mListForFree; - size_t numAllocs = MinNumAllocs; - - while (numAllocs * 2 <= MaxNumAllocs && tmp) { - auto x = reinterpret_cast(tmp); - tmp = *x; - numAllocs *= 2; - } - - return numAllocs; - } - - // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree(). - void add(void* ptr, const size_t numBytes) noexcept { - const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE; - - auto data = reinterpret_cast(ptr); - - // link free list - auto x = reinterpret_cast(data); - *x = mListForFree; - mListForFree = data; - - // create linked list for newly allocated data - auto* const headT = - reinterpret_cast_no_cast_align_warning(reinterpret_cast(ptr) + ALIGNMENT); - - auto* const head = reinterpret_cast(headT); - - // Visual Studio compiler automatically unrolls this loop, which is pretty cool - for (size_t i = 0; i < numElements; ++i) { - *reinterpret_cast_no_cast_align_warning(head + i * ALIGNED_SIZE) = - head + (i + 1) * ALIGNED_SIZE; - } - - // last one points to 0 - *reinterpret_cast_no_cast_align_warning(head + (numElements - 1) * ALIGNED_SIZE) = - mHead; - mHead = headT; - } - - // Called when no memory is available (mHead == 0). - // Don't inline this slow path. - ROBIN_HOOD(NOINLINE) T* performAllocation() { - size_t const numElementsToAlloc = calcNumElementsToAlloc(); - - // alloc new memory: [prev |T, T, ... T] - size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; - ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE - << " * " << numElementsToAlloc) - add(assertNotNull(std::malloc(bytes)), bytes); - return mHead; - } - - // enforce byte alignment of the T's -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) - static constexpr size_t ALIGNMENT = - (std::max)(std::alignment_of::value, std::alignment_of::value); -#else - static const size_t ALIGNMENT = - (ROBIN_HOOD_STD::alignment_of::value > ROBIN_HOOD_STD::alignment_of::value) - ? ROBIN_HOOD_STD::alignment_of::value - : +ROBIN_HOOD_STD::alignment_of::value; // the + is for walkarround -#endif - - static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT; - - static_assert(MinNumAllocs >= 1, "MinNumAllocs"); - static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs"); - static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE"); - static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod"); - static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT"); - - T* mHead{nullptr}; - T** mListForFree{nullptr}; -}; - -template -struct NodeAllocator; - -// dummy allocator that does nothing -template -struct NodeAllocator { - - // we are not using the data, so just free it. - void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept { - ROBIN_HOOD_LOG("std::free") - std::free(ptr); - } -}; - -template -struct NodeAllocator : public BulkPoolAllocator {}; - -// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making -// my own here. -namespace swappable { -#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17) -using std::swap; -template -struct nothrow { - static const bool value = noexcept(swap(std::declval(), std::declval())); -}; -#else -template -struct nothrow { - static const bool value = std::is_nothrow_swappable::value; -}; -#endif -} // namespace swappable - -} // namespace detail - -struct is_transparent_tag {}; - -// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable, -// which means it would not be allowed to be used in std::memcpy. This struct is copyable, which is -// also tested. -template -struct pair { - using first_type = T1; - using second_type = T2; - - template ::value && - std::is_default_constructible::value>::type> - constexpr pair() noexcept(noexcept(U1()) && noexcept(U2())) - : first() - , second() {} - - // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. - explicit constexpr pair(std::pair const& o) noexcept( - noexcept(T1(std::declval())) && noexcept(T2(std::declval()))) - : first(o.first) - , second(o.second) {} - - // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. - explicit constexpr pair(std::pair&& o) noexcept(noexcept( - T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) - : first(std::move(o.first)) - , second(std::move(o.second)) {} - - constexpr pair(T1&& a, T2&& b) noexcept(noexcept( - T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) - : first(std::move(a)) - , second(std::move(b)) {} - - template - constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward( - std::declval()))) && noexcept(T2(std::forward(std::declval())))) - : first(std::forward(a)) - , second(std::forward(b)) {} - - template - // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members" - // if this constructor is constexpr -#if !ROBIN_HOOD(BROKEN_CONSTEXPR) - constexpr -#endif - pair(std::piecewise_construct_t /*unused*/, std::tuple a, - std::tuple - b) noexcept(noexcept(pair(std::declval&>(), - std::declval&>(), - ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()))) - : pair(a, b, ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()) { - } - - // constructor called from the std::piecewise_construct_t ctor - template - pair(std::tuple& a, std::tuple& b, ROBIN_HOOD_STD::index_sequence /*unused*/, ROBIN_HOOD_STD::index_sequence /*unused*/) noexcept( - noexcept(T1(std::forward(std::get( - std::declval&>()))...)) && noexcept(T2(std:: - forward(std::get( - std::declval&>()))...))) - : first(std::forward(std::get(a))...) - , second(std::forward(std::get(b))...) { - // make visual studio compiler happy about warning about unused a & b. - // Visual studio's pair implementation disables warning 4100. - (void)a; - (void)b; - } - - void swap(pair& o) noexcept((detail::swappable::nothrow::value) && - (detail::swappable::nothrow::value)) { - using std::swap; - swap(first, o.first); - swap(second, o.second); - } - - T1 first; // NOLINT(misc-non-private-member-variables-in-classes) - T2 second; // NOLINT(misc-non-private-member-variables-in-classes) -}; - -template -inline void swap(pair& a, pair& b) noexcept( - noexcept(std::declval&>().swap(std::declval&>()))) { - a.swap(b); -} - -template -inline constexpr bool operator==(pair const& x, pair const& y) { - return (x.first == y.first) && (x.second == y.second); -} -template -inline constexpr bool operator!=(pair const& x, pair const& y) { - return !(x == y); -} -template -inline constexpr bool operator<(pair const& x, pair const& y) noexcept(noexcept( - std::declval() < std::declval()) && noexcept(std::declval() < - std::declval())) { - return x.first < y.first || (!(y.first < x.first) && x.second < y.second); -} -template -inline constexpr bool operator>(pair const& x, pair const& y) { - return y < x; -} -template -inline constexpr bool operator<=(pair const& x, pair const& y) { - return !(x > y); -} -template -inline constexpr bool operator>=(pair const& x, pair const& y) { - return !(x < y); -} - -inline size_t hash_bytes(void const* ptr, size_t len) noexcept { - static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); - static constexpr uint64_t seed = UINT64_C(0xe17a1465); - static constexpr unsigned int r = 47; - - auto const* const data64 = static_cast(ptr); - uint64_t h = seed ^ (len * m); - - size_t const n_blocks = len / 8; - for (size_t i = 0; i < n_blocks; ++i) { - auto k = detail::unaligned_load(data64 + i); - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - } - - auto const* const data8 = reinterpret_cast(data64 + n_blocks); - switch (len & 7U) { - case 7: - h ^= static_cast(data8[6]) << 48U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 6: - h ^= static_cast(data8[5]) << 40U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 5: - h ^= static_cast(data8[4]) << 32U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 4: - h ^= static_cast(data8[3]) << 24U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 3: - h ^= static_cast(data8[2]) << 16U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 2: - h ^= static_cast(data8[1]) << 8U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 1: - h ^= static_cast(data8[0]); - h *= m; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - default: - break; - } - - h ^= h >> r; - - // not doing the final step here, because this will be done by keyToIdx anyways - // h *= m; - // h ^= h >> r; - return static_cast(h); -} - -inline size_t hash_int(uint64_t x) noexcept { - // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested, - // and doesn't need any special 128bit operations. - x ^= x >> 33U; - x *= UINT64_C(0xff51afd7ed558ccd); - x ^= x >> 33U; - - // not doing the final step here, because this will be done by keyToIdx anyways - // x *= UINT64_C(0xc4ceb9fe1a85ec53); - // x ^= x >> 33U; - return static_cast(x); -} - -// A thin wrapper around std::hash, performing an additional simple mixing step of the result. -template -struct hash : public std::hash { - size_t operator()(T const& obj) const - noexcept(noexcept(std::declval>().operator()(std::declval()))) { - // call base hash - auto result = std::hash::operator()(obj); - // return mixed of that, to be save against identity has - return hash_int(static_cast(result)); - } -}; - -template -struct hash> { - size_t operator()(std::basic_string const& str) const noexcept { - return hash_bytes(str.data(), sizeof(CharT) * str.size()); - } -}; - -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) -template -struct hash> { - size_t operator()(std::basic_string_view const& sv) const noexcept { - return hash_bytes(sv.data(), sizeof(CharT) * sv.size()); - } -}; -#endif - -template -struct hash { - size_t operator()(T* ptr) const noexcept { - return hash_int(reinterpret_cast(ptr)); - } -}; - -template -struct hash> { - size_t operator()(std::unique_ptr const& ptr) const noexcept { - return hash_int(reinterpret_cast(ptr.get())); - } -}; - -template -struct hash> { - size_t operator()(std::shared_ptr const& ptr) const noexcept { - return hash_int(reinterpret_cast(ptr.get())); - } -}; - -template -struct hash::value>::type> { - size_t operator()(Enum e) const noexcept { - using Underlying = typename std::underlying_type::type; - return hash{}(static_cast(e)); - } -}; - -#define ROBIN_HOOD_HASH_INT(T) \ - template <> \ - struct hash { \ - size_t operator()(T const& obj) const noexcept { \ - return hash_int(static_cast(obj)); \ - } \ - } - -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wuseless-cast" -#endif -// see https://en.cppreference.com/w/cpp/utility/hash -ROBIN_HOOD_HASH_INT(bool); -ROBIN_HOOD_HASH_INT(char); -ROBIN_HOOD_HASH_INT(signed char); -ROBIN_HOOD_HASH_INT(unsigned char); -ROBIN_HOOD_HASH_INT(char16_t); -ROBIN_HOOD_HASH_INT(char32_t); -#if ROBIN_HOOD(HAS_NATIVE_WCHART) -ROBIN_HOOD_HASH_INT(wchar_t); -#endif -ROBIN_HOOD_HASH_INT(short); -ROBIN_HOOD_HASH_INT(unsigned short); -ROBIN_HOOD_HASH_INT(int); -ROBIN_HOOD_HASH_INT(unsigned int); -ROBIN_HOOD_HASH_INT(long); -ROBIN_HOOD_HASH_INT(long long); -ROBIN_HOOD_HASH_INT(unsigned long); -ROBIN_HOOD_HASH_INT(unsigned long long); -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic pop -#endif -namespace detail { - -template -struct void_type { - using type = void; -}; - -template -struct has_is_transparent : public std::false_type {}; - -template -struct has_is_transparent::type> - : public std::true_type {}; - -// using wrapper classes for hash and key_equal prevents the diamond problem when the same type -// is used. see https://stackoverflow.com/a/28771920/48181 -template -struct WrapHash : public T { - WrapHash() = default; - explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval()))) - : T(o) {} -}; - -template -struct WrapKeyEqual : public T { - WrapKeyEqual() = default; - explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval()))) - : T(o) {} -}; - -// A highly optimized hashmap implementation, using the Robin Hood algorithm. -// -// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but -// be about 2x faster in most cases and require much less allocations. -// -// This implementation uses the following memory layout: -// -// [Node, Node, ... Node | info, info, ... infoSentinel ] -// -// * Node: either a DataNode that directly has the std::pair as member, -// or a DataNode with a pointer to std::pair. Which DataNode representation to use -// depends on how fast the swap() operation is. Heuristically, this is automatically choosen -// based on sizeof(). there are always 2^n Nodes. -// -// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes. -// Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the -// corresponding node contains data. Set to 2 means the corresponding Node is filled, but it -// actually belongs to the previous position and was pushed out because that place is already -// taken. -// -// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the -// need for a idx variable. -// -// According to STL, order of templates has effect on throughput. That's why I've moved the -// boolean to the front. -// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/ -template -class Table - : public WrapHash, - public WrapKeyEqual, - detail::NodeAllocator< - typename std::conditional< - std::is_void::value, Key, - robin_hood::pair::type, T>>::type, - 4, 16384, IsFlat> { -public: - static constexpr bool is_flat = IsFlat; - static constexpr bool is_map = !std::is_void::value; - static constexpr bool is_set = !is_map; - static constexpr bool is_transparent = - has_is_transparent::value && has_is_transparent::value; - - using key_type = Key; - using mapped_type = T; - using value_type = typename std::conditional< - is_set, Key, - robin_hood::pair::type, T>>::type; - using size_type = size_t; - using hasher = Hash; - using key_equal = KeyEqual; - using Self = Table; - -private: - static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100, - "MaxLoadFactor100 needs to be >10 && < 100"); - - using WHash = WrapHash; - using WKeyEqual = WrapKeyEqual; - - // configuration defaults - - // make sure we have 8 elements, needed to quickly rehash mInfo - static constexpr size_t InitialNumElements = sizeof(uint64_t); - static constexpr uint32_t InitialInfoNumBits = 5; - static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits; - static constexpr size_t InfoMask = InitialInfoInc - 1U; - static constexpr uint8_t InitialInfoHashShift = 0; - using DataPool = detail::NodeAllocator; - - // type needs to be wider than uint8_t. - using InfoType = uint32_t; - - // DataNode //////////////////////////////////////////////////////// - - // Primary template for the data node. We have special implementations for small and big - // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these - // on the heap so swap merely swaps a pointer. - template - class DataNode {}; - - // Small: just allocate on the stack. - template - class DataNode final { - public: - template - explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept( - noexcept(value_type(std::forward(args)...))) - : mData(std::forward(args)...) {} - - DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept( - std::is_nothrow_move_constructible::value) - : mData(std::move(n.mData)) {} - - // doesn't do anything - void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {} - void destroyDoNotDeallocate() noexcept {} - - value_type const* operator->() const noexcept { - return &mData; - } - value_type* operator->() noexcept { - return &mData; - } - - const value_type& operator*() const noexcept { - return mData; - } - - value_type& operator*() noexcept { - return mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return mData.first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type - getFirst() const noexcept { - return mData.first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() const noexcept { - return mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() noexcept { - return mData.second; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() const noexcept { - return mData.second; - } - - void swap(DataNode& o) noexcept( - noexcept(std::declval().swap(std::declval()))) { - mData.swap(o.mData); - } - - private: - value_type mData; - }; - - // big object: allocate on heap. - template - class DataNode { - public: - template - explicit DataNode(M& map, Args&&... args) - : mData(map.allocate()) { - ::new (static_cast(mData)) value_type(std::forward(args)...); - } - - DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept - : mData(std::move(n.mData)) {} - - void destroy(M& map) noexcept { - // don't deallocate, just put it into list of datapool. - mData->~value_type(); - map.deallocate(mData); - } - - void destroyDoNotDeallocate() noexcept { - mData->~value_type(); - } - - value_type const* operator->() const noexcept { - return mData; - } - - value_type* operator->() noexcept { - return mData; - } - - const value_type& operator*() const { - return *mData; - } - - value_type& operator*() { - return *mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return mData->first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return *mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type - getFirst() const noexcept { - return mData->first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() const noexcept { - return *mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() noexcept { - return mData->second; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() const noexcept { - return mData->second; - } - - void swap(DataNode& o) noexcept { - using std::swap; - swap(mData, o.mData); - } - - private: - value_type* mData; - }; - - using Node = DataNode; - - // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required) - ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept { - return n.getFirst(); - } - - // in case we have void mapped_type, we are not using a pair, thus we just route k through. - // No need to disable this because it's just not used if not applicable. - ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept { - return k; - } - - // in case we have non-void mapped_type, we have a standard robin_hood::pair - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::value, key_type const&>::type - getFirstConst(value_type const& vt) const noexcept { - return vt.first; - } - - // Cloner ////////////////////////////////////////////////////////// - - template - struct Cloner; - - // fast path: Just copy data, without allocating anything. - template - struct Cloner { - void operator()(M const& source, M& target) const { - auto const* const src = reinterpret_cast(source.mKeyVals); - auto* tgt = reinterpret_cast(target.mKeyVals); - auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1); - std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt); - } - }; - - template - struct Cloner { - void operator()(M const& s, M& t) const { - auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1); - std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo); - - for (size_t i = 0; i < numElementsWithBuffer; ++i) { - if (t.mInfo[i]) { - ::new (static_cast(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]); - } - } - } - }; - - // Destroyer /////////////////////////////////////////////////////// - - template - struct Destroyer {}; - - template - struct Destroyer { - void nodes(M& m) const noexcept { - m.mNumElements = 0; - } - - void nodesDoNotDeallocate(M& m) const noexcept { - m.mNumElements = 0; - } - }; - - template - struct Destroyer { - void nodes(M& m) const noexcept { - m.mNumElements = 0; - // clear also resets mInfo to 0, that's sometimes not necessary. - auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); - - for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { - if (0 != m.mInfo[idx]) { - Node& n = m.mKeyVals[idx]; - n.destroy(m); - n.~Node(); - } - } - } - - void nodesDoNotDeallocate(M& m) const noexcept { - m.mNumElements = 0; - // clear also resets mInfo to 0, that's sometimes not necessary. - auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); - for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { - if (0 != m.mInfo[idx]) { - Node& n = m.mKeyVals[idx]; - n.destroyDoNotDeallocate(); - n.~Node(); - } - } - } - }; - - // Iter //////////////////////////////////////////////////////////// - - struct fast_forward_tag {}; - - // generic iterator for both const_iterator and iterator. - template - // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions) - class Iter { - private: - using NodePtr = typename std::conditional::type; - - public: - using difference_type = std::ptrdiff_t; - using value_type = typename Self::value_type; - using reference = typename std::conditional::type; - using pointer = typename std::conditional::type; - using iterator_category = std::forward_iterator_tag; - - // default constructed iterator can be compared to itself, but WON'T return true when - // compared to end(). - Iter() = default; - - // Rule of zero: nothing specified. The conversion constructor is only enabled for - // iterator to const_iterator, so it doesn't accidentally work as a copy ctor. - - // Conversion constructor from iterator to const_iterator. - template ::type> - // NOLINTNEXTLINE(hicpp-explicit-conversions) - Iter(Iter const& other) noexcept - : mKeyVals(other.mKeyVals) - , mInfo(other.mInfo) {} - - Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept - : mKeyVals(valPtr) - , mInfo(infoPtr) {} - - Iter(NodePtr valPtr, uint8_t const* infoPtr, - fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept - : mKeyVals(valPtr) - , mInfo(infoPtr) { - fastForward(); - } - - template ::type> - Iter& operator=(Iter const& other) noexcept { - mKeyVals = other.mKeyVals; - mInfo = other.mInfo; - return *this; - } - - // prefix increment. Undefined behavior if we are at end()! - Iter& operator++() noexcept { - mInfo++; - mKeyVals++; - fastForward(); - return *this; - } - - Iter operator++(int) noexcept { - Iter tmp = *this; - ++(*this); - return tmp; - } - - reference operator*() const { - return **mKeyVals; - } - - pointer operator->() const { - return &**mKeyVals; - } - - template - bool operator==(Iter const& o) const noexcept { - return mKeyVals == o.mKeyVals; - } - - template - bool operator!=(Iter const& o) const noexcept { - return mKeyVals != o.mKeyVals; - } - - private: - // fast forward to the next non-free info byte - // I've tried a few variants that don't depend on intrinsics, but unfortunately they are - // quite a bit slower than this one. So I've reverted that change again. See map_benchmark. - void fastForward() noexcept { - size_t n = 0; - while (0U == (n = detail::unaligned_load(mInfo))) { - mInfo += sizeof(size_t); - mKeyVals += sizeof(size_t); - } -#if defined(ROBIN_HOOD_DISABLE_INTRINSICS) - // we know for certain that within the next 8 bytes we'll find a non-zero one. - if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { - mInfo += 4; - mKeyVals += 4; - } - if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { - mInfo += 2; - mKeyVals += 2; - } - if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) { - mInfo += 1; - mKeyVals += 1; - } -#else -# if ROBIN_HOOD(LITTLE_ENDIAN) - auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; -# else - auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; -# endif - mInfo += inc; - mKeyVals += inc; -#endif - } - - friend class Table; - NodePtr mKeyVals{nullptr}; - uint8_t const* mInfo{nullptr}; - }; - - //////////////////////////////////////////////////////////////////// - - // highly performance relevant code. - // Lower bits are used for indexing into the array (2^n size) - // The upper 1-5 bits need to be a reasonable good hash, to save comparisons. - template - void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const { - // In addition to whatever hash is used, add another mul & shift so we get better hashing. - // This serves as a bad hash prevention, if the given data is - // badly mixed. - auto h = static_cast(WHash::operator()(key)); - - h *= mHashMultiplier; - h ^= h >> 33U; - - // the lower InitialInfoNumBits are reserved for info. - *info = mInfoInc + static_cast((h & InfoMask) >> mInfoHashShift); - *idx = (static_cast(h) >> InitialInfoNumBits) & mMask; - } - - // forwards the index by one, wrapping around at the end - void next(InfoType* info, size_t* idx) const noexcept { - *idx = *idx + 1; - *info += mInfoInc; - } - - void nextWhileLess(InfoType* info, size_t* idx) const noexcept { - // unrolling this by hand did not bring any speedups. - while (*info < mInfo[*idx]) { - next(info, idx); - } - } - - // Shift everything up by one element. Tries to move stuff around. - void - shiftUp(size_t startIdx, - size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable::value) { - auto idx = startIdx; - ::new (static_cast(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1])); - while (--idx != insertion_idx) { - mKeyVals[idx] = std::move(mKeyVals[idx - 1]); - } - - idx = startIdx; - while (idx != insertion_idx) { - ROBIN_HOOD_COUNT(shiftUp) - mInfo[idx] = static_cast(mInfo[idx - 1] + mInfoInc); - if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - --idx; - } - } - - void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable::value) { - // until we find one that is either empty or has zero offset. - // TODO(martinus) we don't need to move everything, just the last one for the same - // bucket. - mKeyVals[idx].destroy(*this); - - // until we find one that is either empty or has zero offset. - while (mInfo[idx + 1] >= 2 * mInfoInc) { - ROBIN_HOOD_COUNT(shiftDown) - mInfo[idx] = static_cast(mInfo[idx + 1] - mInfoInc); - mKeyVals[idx] = std::move(mKeyVals[idx + 1]); - ++idx; - } - - mInfo[idx] = 0; - // don't destroy, we've moved it - // mKeyVals[idx].destroy(*this); - mKeyVals[idx].~Node(); - } - - // copy of find(), except that it returns iterator instead of const_iterator. - template - ROBIN_HOOD(NODISCARD) - size_t findIdx(Other const& key) const { - size_t idx{}; - InfoType info{}; - keyToIdx(key, &idx, &info); - - do { - // unrolling this twice gives a bit of a speedup. More unrolling did not help. - if (info == mInfo[idx] && - ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { - return idx; - } - next(&info, &idx); - if (info == mInfo[idx] && - ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { - return idx; - } - next(&info, &idx); - } while (info <= mInfo[idx]); - - // nothing found! - return mMask == 0 ? 0 - : static_cast(std::distance( - mKeyVals, reinterpret_cast_no_cast_align_warning(mInfo))); - } - - void cloneData(const Table& o) { - Cloner()(o, *this); - } - - // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. - // @return True on success, false if something went wrong - void insert_move(Node&& keyval) { - // we don't retry, fail if overflowing - // don't need to check max num elements - if (0 == mMaxNumElementsAllowed && !try_increase_info()) { - throwOverflowError(); - } - - size_t idx{}; - InfoType info{}; - keyToIdx(keyval.getFirst(), &idx, &info); - - // skip forward. Use <= because we are certain that the element is not there. - while (info <= mInfo[idx]) { - idx = idx + 1; - info += mInfoInc; - } - - // key not found, so we are now exactly where we want to insert it. - auto const insertion_idx = idx; - auto const insertion_info = static_cast(info); - if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - - // find an empty spot - while (0 != mInfo[idx]) { - next(&info, &idx); - } - - auto& l = mKeyVals[insertion_idx]; - if (idx == insertion_idx) { - ::new (static_cast(&l)) Node(std::move(keyval)); - } else { - shiftUp(idx, insertion_idx); - l = std::move(keyval); - } - - // put at empty spot - mInfo[insertion_idx] = insertion_info; - - ++mNumElements; - } - -public: - using iterator = Iter; - using const_iterator = Iter; - - Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual())) - : WHash() - , WKeyEqual() { - ROBIN_HOOD_TRACE(this) - } - - // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. - // This tremendously speeds up ctor & dtor of a map that never receives an element. The - // penalty is payed at the first insert, and not before. Lookup of this empty map works - // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the - // standard, but we can ignore it. - explicit Table( - size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{}, - const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal))) - : WHash(h) - , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this) - } - - template - Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, - const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) - : WHash(h) - , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this) - insert(first, last); - } - - Table(std::initializer_list initlist, - size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, - const KeyEqual& equal = KeyEqual{}) - : WHash(h) - , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this) - insert(initlist.begin(), initlist.end()); - } - - Table(Table&& o) noexcept - : WHash(std::move(static_cast(o))) - , WKeyEqual(std::move(static_cast(o))) - , DataPool(std::move(static_cast(o))) { - ROBIN_HOOD_TRACE(this) - if (o.mMask) { - mHashMultiplier = std::move(o.mHashMultiplier); - mKeyVals = std::move(o.mKeyVals); - mInfo = std::move(o.mInfo); - mNumElements = std::move(o.mNumElements); - mMask = std::move(o.mMask); - mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); - mInfoInc = std::move(o.mInfoInc); - mInfoHashShift = std::move(o.mInfoHashShift); - // set other's mask to 0 so its destructor won't do anything - o.init(); - } - } - - Table& operator=(Table&& o) noexcept { - ROBIN_HOOD_TRACE(this) - if (&o != this) { - if (o.mMask) { - // only move stuff if the other map actually has some data - destroy(); - mHashMultiplier = std::move(o.mHashMultiplier); - mKeyVals = std::move(o.mKeyVals); - mInfo = std::move(o.mInfo); - mNumElements = std::move(o.mNumElements); - mMask = std::move(o.mMask); - mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); - mInfoInc = std::move(o.mInfoInc); - mInfoHashShift = std::move(o.mInfoHashShift); - WHash::operator=(std::move(static_cast(o))); - WKeyEqual::operator=(std::move(static_cast(o))); - DataPool::operator=(std::move(static_cast(o))); - - o.init(); - - } else { - // nothing in the other map => just clear us. - clear(); - } - } - return *this; - } - - Table(const Table& o) - : WHash(static_cast(o)) - , WKeyEqual(static_cast(o)) - , DataPool(static_cast(o)) { - ROBIN_HOOD_TRACE(this) - if (!o.empty()) { - // not empty: create an exact copy. it is also possible to just iterate through all - // elements and insert them, but copying is probably faster. - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); - - ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" - << numElementsWithBuffer << ")") - mHashMultiplier = o.mHashMultiplier; - mKeyVals = static_cast( - detail::assertNotNull(std::malloc(numBytesTotal))); - // no need for calloc because clonData does memcpy - mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); - mNumElements = o.mNumElements; - mMask = o.mMask; - mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; - mInfoInc = o.mInfoInc; - mInfoHashShift = o.mInfoHashShift; - cloneData(o); - } - } - - // Creates a copy of the given map. Copy constructor of each entry is used. - // Not sure why clang-tidy thinks this doesn't handle self assignment, it does - // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) - Table& operator=(Table const& o) { - ROBIN_HOOD_TRACE(this) - if (&o == this) { - // prevent assigning of itself - return *this; - } - - // we keep using the old allocator and not assign the new one, because we want to keep - // the memory available. when it is the same size. - if (o.empty()) { - if (0 == mMask) { - // nothing to do, we are empty too - return *this; - } - - // not empty: destroy what we have there - // clear also resets mInfo to 0, that's sometimes not necessary. - destroy(); - init(); - WHash::operator=(static_cast(o)); - WKeyEqual::operator=(static_cast(o)); - DataPool::operator=(static_cast(o)); - - return *this; - } - - // clean up old stuff - Destroyer::value>{}.nodes(*this); - - if (mMask != o.mMask) { - // no luck: we don't have the same array size allocated, so we need to realloc. - if (0 != mMask) { - // only deallocate if we actually have data! - ROBIN_HOOD_LOG("std::free") - std::free(mKeyVals); - } - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); - ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" - << numElementsWithBuffer << ")") - mKeyVals = static_cast( - detail::assertNotNull(std::malloc(numBytesTotal))); - - // no need for calloc here because cloneData performs a memcpy. - mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); - // sentinel is set in cloneData - } - WHash::operator=(static_cast(o)); - WKeyEqual::operator=(static_cast(o)); - DataPool::operator=(static_cast(o)); - mHashMultiplier = o.mHashMultiplier; - mNumElements = o.mNumElements; - mMask = o.mMask; - mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; - mInfoInc = o.mInfoInc; - mInfoHashShift = o.mInfoHashShift; - cloneData(o); - - return *this; - } - - // Swaps everything between the two maps. - void swap(Table& o) { - ROBIN_HOOD_TRACE(this) - using std::swap; - swap(o, *this); - } - - // Clears all data, without resizing. - void clear() { - ROBIN_HOOD_TRACE(this) - if (empty()) { - // don't do anything! also important because we don't want to write to - // DummyInfoByte::b, even though we would just write 0 to it. - return; - } - - Destroyer::value>{}.nodes(*this); - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); - // clear everything, then set the sentinel again - uint8_t const z = 0; - std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z); - mInfo[numElementsWithBuffer] = 1; - - mInfoInc = InitialInfoInc; - mInfoHashShift = InitialInfoHashShift; - } - - // Destroys the map and all it's contents. - ~Table() { - ROBIN_HOOD_TRACE(this) - destroy(); - } - - // Checks if both tables contain the same entries. Order is irrelevant. - bool operator==(const Table& other) const { - ROBIN_HOOD_TRACE(this) - if (other.size() != size()) { - return false; - } - for (auto const& otherEntry : other) { - if (!has(otherEntry)) { - return false; - } - } - - return true; - } - - bool operator!=(const Table& other) const { - ROBIN_HOOD_TRACE(this) - return !operator==(other); - } - - template - typename std::enable_if::value, Q&>::type operator[](const key_type& key) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) - Node(*this, std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple()); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, - std::forward_as_tuple(key), std::forward_as_tuple()); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - } - - return mKeyVals[idxAndState.first].getSecond(); - } - - template - typename std::enable_if::value, Q&>::type operator[](key_type&& key) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) - Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), - std::forward_as_tuple()); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = - Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), - std::forward_as_tuple()); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - } - - return mKeyVals[idxAndState.first].getSecond(); - } - - template - void insert(Iter first, Iter last) { - for (; first != last; ++first) { - // value_type ctor needed because this might be called with std::pair's - insert(value_type(*first)); - } - } - - void insert(std::initializer_list ilist) { - for (auto&& vt : ilist) { - insert(std::move(vt)); - } - } - - template - std::pair emplace(Args&&... args) { - ROBIN_HOOD_TRACE(this) - Node n{*this, std::forward(args)...}; - auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n)); - switch (idxAndState.second) { - case InsertionState::key_found: - n.destroy(*this); - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) Node(*this, std::move(n)); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = std::move(n); - break; - - case InsertionState::overflow_error: - n.destroy(*this); - throwOverflowError(); - break; - } - - return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), - InsertionState::key_found != idxAndState.second); - } - - template - iterator emplace_hint(const_iterator position, Args&&... args) { - (void)position; - return emplace(std::forward(args)...).first; - } - - template - std::pair try_emplace(const key_type& key, Args&&... args) { - return try_emplace_impl(key, std::forward(args)...); - } - - template - std::pair try_emplace(key_type&& key, Args&&... args) { - return try_emplace_impl(std::move(key), std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) { - (void)hint; - return try_emplace_impl(key, std::forward(args)...).first; - } - - template - iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) { - (void)hint; - return try_emplace_impl(std::move(key), std::forward(args)...).first; - } - - template - std::pair insert_or_assign(const key_type& key, Mapped&& obj) { - return insertOrAssignImpl(key, std::forward(obj)); - } - - template - std::pair insert_or_assign(key_type&& key, Mapped&& obj) { - return insertOrAssignImpl(std::move(key), std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) { - (void)hint; - return insertOrAssignImpl(key, std::forward(obj)).first; - } - - template - iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) { - (void)hint; - return insertOrAssignImpl(std::move(key), std::forward(obj)).first; - } - - std::pair insert(const value_type& keyval) { - ROBIN_HOOD_TRACE(this) - return emplace(keyval); - } - - iterator insert(const_iterator hint, const value_type& keyval) { - (void)hint; - return emplace(keyval).first; - } - - std::pair insert(value_type&& keyval) { - return emplace(std::move(keyval)); - } - - iterator insert(const_iterator hint, value_type&& keyval) { - (void)hint; - return emplace(std::move(keyval)).first; - } - - // Returns 1 if key is found, 0 otherwise. - size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { - return 1; - } - return 0; - } - - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::type count(const OtherKey& key) const { - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { - return 1; - } - return 0; - } - - bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - return 1U == count(key); - } - - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::type contains(const OtherKey& key) const { - return 1U == count(key); - } - - // Returns a reference to the value found for key. - // Throws std::out_of_range if element cannot be found - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::value, Q&>::type at(key_type const& key) { - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { - doThrow("key not found"); - } - return kv->getSecond(); - } - - // Returns a reference to the value found for key. - // Throws std::out_of_range if element cannot be found - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::value, Q const&>::type at(key_type const& key) const { - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { - doThrow("key not found"); - } - return kv->getSecond(); - } - - const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return const_iterator{mKeyVals + idx, mInfo + idx}; - } - - template - const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return const_iterator{mKeyVals + idx, mInfo + idx}; - } - - template - typename std::enable_if::type // NOLINT(modernize-use-nodiscard) - find(const OtherKey& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return const_iterator{mKeyVals + idx, mInfo + idx}; - } - - iterator find(const key_type& key) { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return iterator{mKeyVals + idx, mInfo + idx}; - } - - template - iterator find(const OtherKey& key, is_transparent_tag /*unused*/) { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return iterator{mKeyVals + idx, mInfo + idx}; - } - - template - typename std::enable_if::type find(const OtherKey& key) { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return iterator{mKeyVals + idx, mInfo + idx}; - } - - iterator begin() { - ROBIN_HOOD_TRACE(this) - if (empty()) { - return end(); - } - return iterator(mKeyVals, mInfo, fast_forward_tag{}); - } - const_iterator begin() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return cbegin(); - } - const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - if (empty()) { - return cend(); - } - return const_iterator(mKeyVals, mInfo, fast_forward_tag{}); - } - - iterator end() { - ROBIN_HOOD_TRACE(this) - // no need to supply valid info pointer: end() must not be dereferenced, and only node - // pointer is compared. - return iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; - } - const_iterator end() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return cend(); - } - const_iterator cend() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return const_iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; - } - - iterator erase(const_iterator pos) { - ROBIN_HOOD_TRACE(this) - // its safe to perform const cast here - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) - return erase(iterator{const_cast(pos.mKeyVals), const_cast(pos.mInfo)}); - } - - // Erases element at pos, returns iterator to the next element. - iterator erase(iterator pos) { - ROBIN_HOOD_TRACE(this) - // we assume that pos always points to a valid entry, and not end(). - auto const idx = static_cast(pos.mKeyVals - mKeyVals); - - shiftDown(idx); - --mNumElements; - - if (*pos.mInfo) { - // we've backward shifted, return this again - return pos; - } - - // no backward shift, return next element - return ++pos; - } - - size_t erase(const key_type& key) { - ROBIN_HOOD_TRACE(this) - size_t idx{}; - InfoType info{}; - keyToIdx(key, &idx, &info); - - // check while info matches with the source idx - do { - if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { - shiftDown(idx); - --mNumElements; - return 1; - } - next(&info, &idx); - } while (info <= mInfo[idx]); - - // nothing found to delete - return 0; - } - - // reserves space for the specified number of elements. Makes sure the old data fits. - // exactly the same as reserve(c). - void rehash(size_t c) { - // forces a reserve - reserve(c, true); - } - - // reserves space for the specified number of elements. Makes sure the old data fits. - // Exactly the same as rehash(c). Use rehash(0) to shrink to fit. - void reserve(size_t c) { - // reserve, but don't force rehash - reserve(c, false); - } - - // If possible reallocates the map to a smaller one. This frees the underlying table. - // Does not do anything if load_factor is too large for decreasing the table's size. - void compact() { - ROBIN_HOOD_TRACE(this) - auto newSize = InitialNumElements; - while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) { - newSize *= 2; - } - if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { - throwOverflowError(); - } - - ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") - - // only actually do anything when the new size is bigger than the old one. This prevents to - // continuously allocate for each reserve() call. - if (newSize < mMask + 1) { - rehashPowerOfTwo(newSize, true); - } - } - - size_type size() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return mNumElements; - } - - size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return static_cast(-1); - } - - ROBIN_HOOD(NODISCARD) bool empty() const noexcept { - ROBIN_HOOD_TRACE(this) - return 0 == mNumElements; - } - - float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return MaxLoadFactor100 / 100.0F; - } - - // Average number of elements per bucket. Since we allow only 1 per bucket - float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return static_cast(size()) / static_cast(mMask + 1); - } - - ROBIN_HOOD(NODISCARD) size_t mask() const noexcept { - ROBIN_HOOD_TRACE(this) - return mMask; - } - - ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept { - if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits::max)() / 100)) { - return maxElements * MaxLoadFactor100 / 100; - } - - // we might be a bit inprecise, but since maxElements is quite large that doesn't matter - return (maxElements / 100) * MaxLoadFactor100; - } - - ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept { - // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load - // 64bit types. - return numElements + sizeof(uint64_t); - } - - ROBIN_HOOD(NODISCARD) - size_t calcNumElementsWithBuffer(size_t numElements) const noexcept { - auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements); - return numElements + (std::min)(maxNumElementsAllowed, (static_cast(0xFF))); - } - - // calculation only allowed for 2^n values - ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const { -#if ROBIN_HOOD(BITNESS) == 64 - return numElements * sizeof(Node) + calcNumBytesInfo(numElements); -#else - // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows. - auto const ne = static_cast(numElements); - auto const s = static_cast(sizeof(Node)); - auto const infos = static_cast(calcNumBytesInfo(numElements)); - - auto const total64 = ne * s + infos; - auto const total = static_cast(total64); - - if (ROBIN_HOOD_UNLIKELY(static_cast(total) != total64)) { - throwOverflowError(); - } - return total; -#endif - } - -private: - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::value, bool>::type has(const value_type& e) const { - ROBIN_HOOD_TRACE(this) - auto it = find(e.first); - return it != end() && it->second == e.second; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::value, bool>::type has(const value_type& e) const { - ROBIN_HOOD_TRACE(this) - return find(e) != end(); - } - - void reserve(size_t c, bool forceRehash) { - ROBIN_HOOD_TRACE(this) - auto const minElementsAllowed = (std::max)(c, mNumElements); - auto newSize = InitialNumElements; - while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { - newSize *= 2; - } - if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { - throwOverflowError(); - } - - ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") - - // only actually do anything when the new size is bigger than the old one. This prevents to - // continuously allocate for each reserve() call. - if (forceRehash || newSize > mMask + 1) { - rehashPowerOfTwo(newSize, false); - } - } - - // reserves space for at least the specified number of elements. - // only works if numBuckets if power of two - // True on success, false otherwise - void rehashPowerOfTwo(size_t numBuckets, bool forceFree) { - ROBIN_HOOD_TRACE(this) - - Node* const oldKeyVals = mKeyVals; - uint8_t const* const oldInfo = mInfo; - - const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); - - // resize operation: move stuff - initData(numBuckets); - if (oldMaxElementsWithBuffer > 1) { - for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) { - if (oldInfo[i] != 0) { - // might throw an exception, which is really bad since we are in the middle of - // moving stuff. - insert_move(std::move(oldKeyVals[i])); - // destroy the node but DON'T destroy the data. - oldKeyVals[i].~Node(); - } - } - - // this check is not necessary as it's guarded by the previous if, but it helps - // silence g++'s overeager "attempt to free a non-heap object 'map' - // [-Werror=free-nonheap-object]" warning. - if (oldKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { - // don't destroy old data: put it into the pool instead - if (forceFree) { - std::free(oldKeyVals); - } else { - DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); - } - } - } - } - - ROBIN_HOOD(NOINLINE) void throwOverflowError() const { -#if ROBIN_HOOD(HAS_EXCEPTIONS) - throw std::overflow_error("robin_hood::map overflow"); -#else - abort(); -#endif - } - - template - std::pair try_emplace_impl(OtherKey&& key, Args&&... args) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) Node( - *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(args)...)); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(args)...)); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - break; - } - - return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), - InsertionState::key_found != idxAndState.second); - } - - template - std::pair insertOrAssignImpl(OtherKey&& key, Mapped&& obj) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - mKeyVals[idxAndState.first].getSecond() = std::forward(obj); - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) Node( - *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(obj))); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(obj))); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - break; - } - - return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), - InsertionState::key_found != idxAndState.second); - } - - void initData(size_t max_elements) { - mNumElements = 0; - mMask = max_elements - 1; - mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements); - - // malloc & zero mInfo. Faster than calloc everything. - auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); - ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal(" - << numElementsWithBuffer << ")") - mKeyVals = reinterpret_cast( - detail::assertNotNull(std::malloc(numBytesTotal))); - mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); - std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node)); - - // set sentinel - mInfo[numElementsWithBuffer] = 1; - - mInfoInc = InitialInfoInc; - mInfoHashShift = InitialInfoHashShift; - } - - enum class InsertionState { overflow_error, key_found, new_node, overwrite_node }; - - // Finds key, and if not already present prepares a spot where to pot the key & value. - // This potentially shifts nodes out of the way, updates mInfo and number of inserted - // elements, so the only operation left to do is create/assign a new node at that spot. - template - std::pair insertKeyPrepareEmptySpot(OtherKey&& key) { - for (int i = 0; i < 256; ++i) { - size_t idx{}; - InfoType info{}; - keyToIdx(key, &idx, &info); - nextWhileLess(&info, &idx); - - // while we potentially have a match - while (info == mInfo[idx]) { - if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { - // key already exists, do NOT insert. - // see http://en.cppreference.com/w/cpp/container/unordered_map/insert - return std::make_pair(idx, InsertionState::key_found); - } - next(&info, &idx); - } - - // unlikely that this evaluates to true - if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { - if (!increase_size()) { - return std::make_pair(size_t(0), InsertionState::overflow_error); - } - continue; - } - - // key not found, so we are now exactly where we want to insert it. - auto const insertion_idx = idx; - auto const insertion_info = info; - if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - - // find an empty spot - while (0 != mInfo[idx]) { - next(&info, &idx); - } - - if (idx != insertion_idx) { - shiftUp(idx, insertion_idx); - } - // put at empty spot - mInfo[insertion_idx] = static_cast(insertion_info); - ++mNumElements; - return std::make_pair(insertion_idx, idx == insertion_idx - ? InsertionState::new_node - : InsertionState::overwrite_node); - } - - // enough attempts failed, so finally give up. - return std::make_pair(size_t(0), InsertionState::overflow_error); - } - - bool try_increase_info() { - ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements - << ", maxNumElementsAllowed=" - << calcMaxNumElementsAllowed(mMask + 1)) - if (mInfoInc <= 2) { - // need to be > 2 so that shift works (otherwise undefined behavior!) - return false; - } - // we got space left, try to make info smaller - mInfoInc = static_cast(mInfoInc >> 1U); - - // remove one bit of the hash, leaving more space for the distance info. - // This is extremely fast because we can operate on 8 bytes at once. - ++mInfoHashShift; - auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); - - for (size_t i = 0; i < numElementsWithBuffer; i += 8) { - auto val = unaligned_load(mInfo + i); - val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f); - std::memcpy(mInfo + i, &val, sizeof(val)); - } - // update sentinel, which might have been cleared out! - mInfo[numElementsWithBuffer] = 1; - - mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); - return true; - } - - // True if resize was possible, false otherwise - bool increase_size() { - // nothing allocated yet? just allocate InitialNumElements - if (0 == mMask) { - initData(InitialNumElements); - return true; - } - - auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); - if (mNumElements < maxNumElementsAllowed && try_increase_info()) { - return true; - } - - ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed=" - << maxNumElementsAllowed << ", load=" - << (static_cast(mNumElements) * 100.0 / - (static_cast(mMask) + 1))) - - if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { - // we have to resize, even though there would still be plenty of space left! - // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case - // we have to rehash a few times - nextHashMultiplier(); - rehashPowerOfTwo(mMask + 1, true); - } else { - // we've reached the capacity of the map, so the hash seems to work nice. Keep using it. - rehashPowerOfTwo((mMask + 1) * 2, false); - } - return true; - } - - void nextHashMultiplier() { - // adding an *even* number, so that the multiplier will always stay odd. This is necessary - // so that the hash stays a mixing function (and thus doesn't have any information loss). - mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54); - } - - void destroy() { - if (0 == mMask) { - // don't deallocate! - return; - } - - Destroyer::value>{} - .nodesDoNotDeallocate(*this); - - // This protection against not deleting mMask shouldn't be needed as it's sufficiently - // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise - // reports a compile error: attempt to free a non-heap object 'fm' - // [-Werror=free-nonheap-object] - if (mKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { - ROBIN_HOOD_LOG("std::free") - std::free(mKeyVals); - } - } - - void init() noexcept { - mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); - mInfo = reinterpret_cast(&mMask); - mNumElements = 0; - mMask = 0; - mMaxNumElementsAllowed = 0; - mInfoInc = InitialInfoInc; - mInfoHashShift = InitialInfoHashShift; - } - - // members are sorted so no padding occurs - uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53); // 8 byte 8 - Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 16 - uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 24 - size_t mNumElements = 0; // 8 byte 32 - size_t mMask = 0; // 8 byte 40 - size_t mMaxNumElementsAllowed = 0; // 8 byte 48 - InfoType mInfoInc = InitialInfoInc; // 4 byte 52 - InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 56 - // 16 byte 56 if NodeAllocator -}; - -} // namespace detail - -// map - -template , - typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> -using unordered_flat_map = detail::Table; - -template , - typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> -using unordered_node_map = detail::Table; - -template , - typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> -using unordered_map = - detail::Table) <= sizeof(size_t) * 6 && - std::is_nothrow_move_constructible>::value && - std::is_nothrow_move_assignable>::value, - MaxLoadFactor100, Key, T, Hash, KeyEqual>; - -// set - -template , typename KeyEqual = std::equal_to, - size_t MaxLoadFactor100 = 80> -using unordered_flat_set = detail::Table; - -template , typename KeyEqual = std::equal_to, - size_t MaxLoadFactor100 = 80> -using unordered_node_set = detail::Table; - -template , typename KeyEqual = std::equal_to, - size_t MaxLoadFactor100 = 80> -using unordered_set = detail::Table::value && - std::is_nothrow_move_assignable::value, - MaxLoadFactor100, Key, void, Hash, KeyEqual>; - -} // namespace robin_hood - -#endif From 7d4a558f8b08275950d37e60867bbe61676d9bf2 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 28 Feb 2023 22:17:11 -0800 Subject: [PATCH 21/49] Revert "undo bitmap and unordered map" This reverts commit e3b6ff4ea2a9f4e69ed4b446e98898ffb4ffe656. --- src/Common.cpp | 84 +- src/Common.hpp | 7 +- src/bustools_count.cpp | 6 +- src/robin_hood.h | 2544 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 2586 insertions(+), 55 deletions(-) create mode 100644 src/robin_hood.h diff --git a/src/Common.cpp b/src/Common.cpp index b4770bd..9a95e54 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -73,7 +73,7 @@ std::vector intersect_vectors(const std::vector> & return std::move(u); } -int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes) { +int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes) { if (ecs.empty()) { return -1; } @@ -85,59 +85,36 @@ int32_t intersect_ecs(const std::vector &ecs, std::vector &u, if (ecs.size() == 1) { return ecs[0]; // no work } - - u.resize(0); - auto &v = ecmap[ecs[0]]; // copy - for (size_t i = 0; i< v.size(); i++) { - u.push_back(v[i]); - } + uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ecs[0]][0]))); + u = Roaring(ecmap[ecs[0]].size(), data); + for (size_t i = 1; i < ecs.size(); i++) { if (ecs[i] < 0 || ecs[i] >= ecmap.size()) { return -1; } - const auto &v = ecmap[ecs[i]]; - - int j = 0; - int k = 0; - int l = 0; - int n = u.size(); - int m = v.size(); - // u and v are sorted, j,k,l = 0 - while (j < n && l < m) { - // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m - // u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted - if (u[j] < v[l]) { - j++; - } else if (u[j] > v[l]) { - l++; - } else { - // match - if (k < j) { - std::swap(u[k], u[j]); - } - k++; - j++; - i++; - } - } - if (k < n) { - u.resize(k); - } + data = reinterpret_cast(const_cast(&(ecmap[ecs[i]][0]))); + u &= Roaring(ecmap[ecs[i]].size(), data); } - if (u.empty()) { + if (u.isEmpty()) { return -1; } auto iit = ecmapinv.find(u); if (iit == ecmapinv.end()) { // create new equivalence class int32_t ec = ecmap.size(); - ecmap.push_back(u); + uint32_t* u_arr = new uint32_t[u.cardinality()]; + u.toUint32Array(u_arr); + std::vector u_vec; + u_vec.reserve(u.cardinality()); + for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); + delete[] u_arr; + ecmap.push_back(u_vec); ecmapinv.insert({u,ec}); // figure out the gene list std::vector v; - vt2gene(u, genemap, v); + vt2gene(u_vec, genemap, v); ec2genes.push_back(std::move(v)); return ec; } else { @@ -215,7 +192,7 @@ void intersect_genes_of_ecs(const std::vector &ecs, const std::vector< int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty) { std::vector> gu; // per gene transcript results - std::vector u; // final list of transcripts + Roaring u; // final list of transcripts std::vector glist; int32_t lastg = -2; @@ -245,11 +222,9 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec // frequent case, single gene replace with union for (auto ec : ecs) { for (const auto &t : ecmap[ec]) { - u.push_back(t); + u.add(t); } } - std::sort(u.begin(), u.end()); - u.erase(std::unique(u.begin(), u.end()), u.end()); // look up ecs based on u int32_t ec = -1; @@ -260,9 +235,15 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - ecmap.push_back(u); + uint32_t* u_arr = new uint32_t[u.cardinality()]; + u.toUint32Array(u_arr); + std::vector u_vec; + u_vec.reserve(u.cardinality()); + for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); + delete[] u_arr; + ecmap.push_back(u_vec); std::vector v; - vt2gene(u, genemap, v); + vt2gene(u_vec, genemap, v); ec2genes.push_back(std::move(v)); } @@ -291,14 +272,13 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } for (auto t : uu) { - u.push_back(t); + u.add(t); } } - if (u.empty()) { + if (u.isEmpty()) { return -1; } - std::sort(u.begin(), u.end()); int32_t ec = -1; auto it = ecmapinv.find(u); @@ -307,9 +287,15 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - ecmap.push_back(u); + uint32_t* u_arr = new uint32_t[u.cardinality()]; + u.toUint32Array(u_arr); + std::vector u_vec; + u_vec.reserve(u.cardinality()); + for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); + delete[] u_arr; + ecmap.push_back(u_vec); std::vector v; - vt2gene(u, genemap, v); + vt2gene(u_vec, genemap, v); ec2genes.push_back(std::move(v)); } return ec; diff --git a/src/Common.hpp b/src/Common.hpp index 6a50ebd..4cc8596 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -9,12 +9,13 @@ #include #include #include +#include "robin_hood.h" #include "roaring.hh" #include "hash.hpp" #define BUSTOOLS_VERSION "0.42.0" -#define u_map_ std::unordered_map +#define u_map_ robin_hood::unordered_flat_map enum CAPTURE_TYPE : char { CAPTURE_NONE = 0, @@ -185,12 +186,12 @@ struct RoaringHasher { return r; } }; -typedef u_map_, int32_t, SortedVectorHasher> EcMapInv; +typedef u_map_ EcMapInv; std::vector intersect(std::vector &u, std::vector &v); std::vector union_vectors(const std::vector> &v); std::vector intersect_vectors(const std::vector> &v); -int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); +int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); void vt2gene(const std::vector &v, const std::vector &genemap, std::vector &glist); void intersect_genes_of_ecs(const std::vector &ecs, const std::vector> &ec2genes, std::vector &glist); int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index e0d125f..e2a63a7 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -32,7 +32,8 @@ void bustools_count(Bustools_opt &opt) { ecmap = std::move(h.ecs); ecmapinv.reserve(ecmap.size()); for (int32_t ec = 0; ec < ecmap.size(); ec++) { - ecmapinv.insert({ecmap[ec], ec}); + uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ec][0]))); + ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec}); } std::vector> ec2genes; create_ec2genes(ecmap, genemap, ec2genes); @@ -87,8 +88,7 @@ void bustools_count(Bustools_opt &opt) { std::vector ecs; std::vector glist; ecs.reserve(100); - std::vector u; - u.reserve(100); + Roaring u; std::vector column_v; std::vector>> column_vp; // gene, {count, matrix type} if (!opt.count_collapse) { diff --git a/src/robin_hood.h b/src/robin_hood.h new file mode 100644 index 0000000..0af031f --- /dev/null +++ b/src/robin_hood.h @@ -0,0 +1,2544 @@ +// ______ _____ ______ _________ +// ______________ ___ /_ ___(_)_______ ___ /_ ______ ______ ______ / +// __ ___/_ __ \__ __ \__ / __ __ \ __ __ \_ __ \_ __ \_ __ / +// _ / / /_/ /_ /_/ /_ / _ / / / _ / / // /_/ // /_/ // /_/ / +// /_/ \____/ /_.___/ /_/ /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/ +// _/_____/ +// +// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20 +// https://github.com/martinus/robin-hood-hashing +// +// Licensed under the MIT License . +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2021 Martin Ankerl +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef ROBIN_HOOD_H_INCLUDED +#define ROBIN_HOOD_H_INCLUDED + +// see https://semver.org/ +#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes +#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner +#define ROBIN_HOOD_VERSION_PATCH 5 // for backwards-compatible bug fixes + +#include +#include +#include +#include +#include +#include // only to support hash of smart pointers +#include +#include +#include +#include +#if __cplusplus >= 201703L +# include +#endif + +// #define ROBIN_HOOD_LOG_ENABLED +#ifdef ROBIN_HOOD_LOG_ENABLED +# include +# define ROBIN_HOOD_LOG(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; +#else +# define ROBIN_HOOD_LOG(x) +#endif + +// #define ROBIN_HOOD_TRACE_ENABLED +#ifdef ROBIN_HOOD_TRACE_ENABLED +# include +# define ROBIN_HOOD_TRACE(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; +#else +# define ROBIN_HOOD_TRACE(x) +#endif + +// #define ROBIN_HOOD_COUNT_ENABLED +#ifdef ROBIN_HOOD_COUNT_ENABLED +# include +# define ROBIN_HOOD_COUNT(x) ++counts().x; +namespace robin_hood { +struct Counts { + uint64_t shiftUp{}; + uint64_t shiftDown{}; +}; +inline std::ostream& operator<<(std::ostream& os, Counts const& c) { + return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl; +} + +static Counts& counts() { + static Counts counts{}; + return counts; +} +} // namespace robin_hood +#else +# define ROBIN_HOOD_COUNT(x) +#endif + +// all non-argument macros should use this facility. See +// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/ +#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x() + +// mark unused members with this macro +#define ROBIN_HOOD_UNUSED(identifier) + +// bitness +#if SIZE_MAX == UINT32_MAX +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32 +#elif SIZE_MAX == UINT64_MAX +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64 +#else +# error Unsupported bitness +#endif + +// endianess +#ifdef _MSC_VER +# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0 +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#endif + +// inline +#ifdef _MSC_VER +# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline) +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline)) +#endif + +// exceptions +#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0 +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1 +#endif + +// count leading/trailing bits +#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS) +# ifdef _MSC_VER +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 +# endif +# include +# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ + [](size_t mask) noexcept -> int { \ + unsigned long index; \ + return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ + : ROBIN_HOOD(BITNESS); \ + }(x) +# else +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll +# endif +# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) +# endif +#endif + +// fallthrough +#ifndef __has_cpp_attribute // For backwards compatibility +# define __has_cpp_attribute(x) 0 +#endif +#if __has_cpp_attribute(clang::fallthrough) +# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]] +#elif __has_cpp_attribute(gnu::fallthrough) +# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]] +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() +#endif + +// likely/unlikely +#ifdef _MSC_VER +# define ROBIN_HOOD_LIKELY(condition) condition +# define ROBIN_HOOD_UNLIKELY(condition) condition +#else +# define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1) +# define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0) +#endif + +// detect if native wchar_t type is availiable in MSVC +#ifdef _MSC_VER +# ifdef _NATIVE_WCHAR_T_DEFINED +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +#endif + +// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr +#ifdef _MSC_VER +# if _MSC_VER <= 1900 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +#endif + +// workaround missing "is_trivially_copyable" in g++ < 5.0 +// See https://stackoverflow.com/a/31798726/48181 +#if defined(__GNUC__) && __GNUC__ < 5 +# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) +#else +# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value +#endif + +// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L +#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L + +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) +# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]] +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() +#endif + +namespace robin_hood { + +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) +# define ROBIN_HOOD_STD std +#else + +// c++11 compatibility layer +namespace ROBIN_HOOD_STD { +template +struct alignment_of + : std::integral_constant::type)> {}; + +template +class integer_sequence { +public: + using value_type = T; + static_assert(std::is_integral::value, "not integral type"); + static constexpr std::size_t size() noexcept { + return sizeof...(Ints); + } +}; +template +using index_sequence = integer_sequence; + +namespace detail_ { +template +struct IntSeqImpl { + using TValue = T; + static_assert(std::is_integral::value, "not integral type"); + static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)"); + + template + struct IntSeqCombiner; + + template + struct IntSeqCombiner, integer_sequence> { + using TResult = integer_sequence; + }; + + using TResult = + typename IntSeqCombiner::TResult, + typename IntSeqImpl::TResult>::TResult; +}; + +template +struct IntSeqImpl { + using TValue = T; + static_assert(std::is_integral::value, "not integral type"); + static_assert(Begin >= 0, "unexpected argument (Begin<0)"); + using TResult = integer_sequence; +}; + +template +struct IntSeqImpl { + using TValue = T; + static_assert(std::is_integral::value, "not integral type"); + static_assert(Begin >= 0, "unexpected argument (Begin<0)"); + using TResult = integer_sequence; +}; +} // namespace detail_ + +template +using make_integer_sequence = typename detail_::IntSeqImpl::TResult; + +template +using make_index_sequence = make_integer_sequence; + +template +using index_sequence_for = make_index_sequence; + +} // namespace ROBIN_HOOD_STD + +#endif + +namespace detail { + +// make sure we static_cast to the correct type for hash_int +#if ROBIN_HOOD(BITNESS) == 64 +using SizeT = uint64_t; +#else +using SizeT = uint32_t; +#endif + +template +T rotr(T x, unsigned k) { + return (x >> k) | (x << (8U * sizeof(T) - k)); +} + +// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to +// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with +// care! +template +inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept { + return reinterpret_cast(ptr); +} + +template +inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { + return reinterpret_cast(ptr); +} + +// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other +// inlinings more difficult. Throws are also generally the slow path. +template +[[noreturn]] ROBIN_HOOD(NOINLINE) +#if ROBIN_HOOD(HAS_EXCEPTIONS) + void doThrow(Args&&... args) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) + throw E(std::forward(args)...); +} +#else + void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { + abort(); +} +#endif + +template +T* assertNotNull(T* t, Args&&... args) { + if (ROBIN_HOOD_UNLIKELY(nullptr == t)) { + doThrow(std::forward(args)...); + } + return t; +} + +template +inline T unaligned_load(void const* ptr) noexcept { + // using memcpy so we don't get into unaligned load problems. + // compiler should optimize this very well anyways. + T t; + std::memcpy(&t, ptr, sizeof(T)); + return t; +} + +// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor, +// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a +// pointer. +template +class BulkPoolAllocator { +public: + BulkPoolAllocator() noexcept = default; + + // does not copy anything, just creates a new allocator. + BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept + : mHead(nullptr) + , mListForFree(nullptr) {} + + BulkPoolAllocator(BulkPoolAllocator&& o) noexcept + : mHead(o.mHead) + , mListForFree(o.mListForFree) { + o.mListForFree = nullptr; + o.mHead = nullptr; + } + + BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept { + reset(); + mHead = o.mHead; + mListForFree = o.mListForFree; + o.mListForFree = nullptr; + o.mHead = nullptr; + return *this; + } + + BulkPoolAllocator& + // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) + operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept { + // does not do anything + return *this; + } + + ~BulkPoolAllocator() noexcept { + reset(); + } + + // Deallocates all allocated memory. + void reset() noexcept { + while (mListForFree) { + T* tmp = *mListForFree; + ROBIN_HOOD_LOG("std::free") + std::free(mListForFree); + mListForFree = reinterpret_cast_no_cast_align_warning(tmp); + } + mHead = nullptr; + } + + // allocates, but does NOT initialize. Use in-place new constructor, e.g. + // T* obj = pool.allocate(); + // ::new (static_cast(obj)) T(); + T* allocate() { + T* tmp = mHead; + if (!tmp) { + tmp = performAllocation(); + } + + mHead = *reinterpret_cast_no_cast_align_warning(tmp); + return tmp; + } + + // does not actually deallocate but puts it in store. + // make sure you have already called the destructor! e.g. with + // obj->~T(); + // pool.deallocate(obj); + void deallocate(T* obj) noexcept { + *reinterpret_cast_no_cast_align_warning(obj) = mHead; + mHead = obj; + } + + // Adds an already allocated block of memory to the allocator. This allocator is from now on + // responsible for freeing the data (with free()). If the provided data is not large enough to + // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor. + void addOrFree(void* ptr, const size_t numBytes) noexcept { + // calculate number of available elements in ptr + if (numBytes < ALIGNMENT + ALIGNED_SIZE) { + // not enough data for at least one element. Free and return. + ROBIN_HOOD_LOG("std::free") + std::free(ptr); + } else { + ROBIN_HOOD_LOG("add to buffer") + add(ptr, numBytes); + } + } + + void swap(BulkPoolAllocator& other) noexcept { + using std::swap; + swap(mHead, other.mHead); + swap(mListForFree, other.mListForFree); + } + +private: + // iterates the list of allocated memory to calculate how many to alloc next. + // Recalculating this each time saves us a size_t member. + // This ignores the fact that memory blocks might have been added manually with addOrFree. In + // practice, this should not matter much. + ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept { + auto tmp = mListForFree; + size_t numAllocs = MinNumAllocs; + + while (numAllocs * 2 <= MaxNumAllocs && tmp) { + auto x = reinterpret_cast(tmp); + tmp = *x; + numAllocs *= 2; + } + + return numAllocs; + } + + // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree(). + void add(void* ptr, const size_t numBytes) noexcept { + const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE; + + auto data = reinterpret_cast(ptr); + + // link free list + auto x = reinterpret_cast(data); + *x = mListForFree; + mListForFree = data; + + // create linked list for newly allocated data + auto* const headT = + reinterpret_cast_no_cast_align_warning(reinterpret_cast(ptr) + ALIGNMENT); + + auto* const head = reinterpret_cast(headT); + + // Visual Studio compiler automatically unrolls this loop, which is pretty cool + for (size_t i = 0; i < numElements; ++i) { + *reinterpret_cast_no_cast_align_warning(head + i * ALIGNED_SIZE) = + head + (i + 1) * ALIGNED_SIZE; + } + + // last one points to 0 + *reinterpret_cast_no_cast_align_warning(head + (numElements - 1) * ALIGNED_SIZE) = + mHead; + mHead = headT; + } + + // Called when no memory is available (mHead == 0). + // Don't inline this slow path. + ROBIN_HOOD(NOINLINE) T* performAllocation() { + size_t const numElementsToAlloc = calcNumElementsToAlloc(); + + // alloc new memory: [prev |T, T, ... T] + size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; + ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE + << " * " << numElementsToAlloc) + add(assertNotNull(std::malloc(bytes)), bytes); + return mHead; + } + + // enforce byte alignment of the T's +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) + static constexpr size_t ALIGNMENT = + (std::max)(std::alignment_of::value, std::alignment_of::value); +#else + static const size_t ALIGNMENT = + (ROBIN_HOOD_STD::alignment_of::value > ROBIN_HOOD_STD::alignment_of::value) + ? ROBIN_HOOD_STD::alignment_of::value + : +ROBIN_HOOD_STD::alignment_of::value; // the + is for walkarround +#endif + + static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT; + + static_assert(MinNumAllocs >= 1, "MinNumAllocs"); + static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs"); + static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE"); + static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod"); + static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT"); + + T* mHead{nullptr}; + T** mListForFree{nullptr}; +}; + +template +struct NodeAllocator; + +// dummy allocator that does nothing +template +struct NodeAllocator { + + // we are not using the data, so just free it. + void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept { + ROBIN_HOOD_LOG("std::free") + std::free(ptr); + } +}; + +template +struct NodeAllocator : public BulkPoolAllocator {}; + +// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making +// my own here. +namespace swappable { +#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17) +using std::swap; +template +struct nothrow { + static const bool value = noexcept(swap(std::declval(), std::declval())); +}; +#else +template +struct nothrow { + static const bool value = std::is_nothrow_swappable::value; +}; +#endif +} // namespace swappable + +} // namespace detail + +struct is_transparent_tag {}; + +// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable, +// which means it would not be allowed to be used in std::memcpy. This struct is copyable, which is +// also tested. +template +struct pair { + using first_type = T1; + using second_type = T2; + + template ::value && + std::is_default_constructible::value>::type> + constexpr pair() noexcept(noexcept(U1()) && noexcept(U2())) + : first() + , second() {} + + // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. + explicit constexpr pair(std::pair const& o) noexcept( + noexcept(T1(std::declval())) && noexcept(T2(std::declval()))) + : first(o.first) + , second(o.second) {} + + // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. + explicit constexpr pair(std::pair&& o) noexcept(noexcept( + T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) + : first(std::move(o.first)) + , second(std::move(o.second)) {} + + constexpr pair(T1&& a, T2&& b) noexcept(noexcept( + T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) + : first(std::move(a)) + , second(std::move(b)) {} + + template + constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward( + std::declval()))) && noexcept(T2(std::forward(std::declval())))) + : first(std::forward(a)) + , second(std::forward(b)) {} + + template + // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members" + // if this constructor is constexpr +#if !ROBIN_HOOD(BROKEN_CONSTEXPR) + constexpr +#endif + pair(std::piecewise_construct_t /*unused*/, std::tuple a, + std::tuple + b) noexcept(noexcept(pair(std::declval&>(), + std::declval&>(), + ROBIN_HOOD_STD::index_sequence_for(), + ROBIN_HOOD_STD::index_sequence_for()))) + : pair(a, b, ROBIN_HOOD_STD::index_sequence_for(), + ROBIN_HOOD_STD::index_sequence_for()) { + } + + // constructor called from the std::piecewise_construct_t ctor + template + pair(std::tuple& a, std::tuple& b, ROBIN_HOOD_STD::index_sequence /*unused*/, ROBIN_HOOD_STD::index_sequence /*unused*/) noexcept( + noexcept(T1(std::forward(std::get( + std::declval&>()))...)) && noexcept(T2(std:: + forward(std::get( + std::declval&>()))...))) + : first(std::forward(std::get(a))...) + , second(std::forward(std::get(b))...) { + // make visual studio compiler happy about warning about unused a & b. + // Visual studio's pair implementation disables warning 4100. + (void)a; + (void)b; + } + + void swap(pair& o) noexcept((detail::swappable::nothrow::value) && + (detail::swappable::nothrow::value)) { + using std::swap; + swap(first, o.first); + swap(second, o.second); + } + + T1 first; // NOLINT(misc-non-private-member-variables-in-classes) + T2 second; // NOLINT(misc-non-private-member-variables-in-classes) +}; + +template +inline void swap(pair& a, pair& b) noexcept( + noexcept(std::declval&>().swap(std::declval&>()))) { + a.swap(b); +} + +template +inline constexpr bool operator==(pair const& x, pair const& y) { + return (x.first == y.first) && (x.second == y.second); +} +template +inline constexpr bool operator!=(pair const& x, pair const& y) { + return !(x == y); +} +template +inline constexpr bool operator<(pair const& x, pair const& y) noexcept(noexcept( + std::declval() < std::declval()) && noexcept(std::declval() < + std::declval())) { + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} +template +inline constexpr bool operator>(pair const& x, pair const& y) { + return y < x; +} +template +inline constexpr bool operator<=(pair const& x, pair const& y) { + return !(x > y); +} +template +inline constexpr bool operator>=(pair const& x, pair const& y) { + return !(x < y); +} + +inline size_t hash_bytes(void const* ptr, size_t len) noexcept { + static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); + static constexpr uint64_t seed = UINT64_C(0xe17a1465); + static constexpr unsigned int r = 47; + + auto const* const data64 = static_cast(ptr); + uint64_t h = seed ^ (len * m); + + size_t const n_blocks = len / 8; + for (size_t i = 0; i < n_blocks; ++i) { + auto k = detail::unaligned_load(data64 + i); + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + auto const* const data8 = reinterpret_cast(data64 + n_blocks); + switch (len & 7U) { + case 7: + h ^= static_cast(data8[6]) << 48U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 6: + h ^= static_cast(data8[5]) << 40U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 5: + h ^= static_cast(data8[4]) << 32U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 4: + h ^= static_cast(data8[3]) << 24U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 3: + h ^= static_cast(data8[2]) << 16U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 2: + h ^= static_cast(data8[1]) << 8U; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + case 1: + h ^= static_cast(data8[0]); + h *= m; + ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH + default: + break; + } + + h ^= h >> r; + + // not doing the final step here, because this will be done by keyToIdx anyways + // h *= m; + // h ^= h >> r; + return static_cast(h); +} + +inline size_t hash_int(uint64_t x) noexcept { + // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested, + // and doesn't need any special 128bit operations. + x ^= x >> 33U; + x *= UINT64_C(0xff51afd7ed558ccd); + x ^= x >> 33U; + + // not doing the final step here, because this will be done by keyToIdx anyways + // x *= UINT64_C(0xc4ceb9fe1a85ec53); + // x ^= x >> 33U; + return static_cast(x); +} + +// A thin wrapper around std::hash, performing an additional simple mixing step of the result. +template +struct hash : public std::hash { + size_t operator()(T const& obj) const + noexcept(noexcept(std::declval>().operator()(std::declval()))) { + // call base hash + auto result = std::hash::operator()(obj); + // return mixed of that, to be save against identity has + return hash_int(static_cast(result)); + } +}; + +template +struct hash> { + size_t operator()(std::basic_string const& str) const noexcept { + return hash_bytes(str.data(), sizeof(CharT) * str.size()); + } +}; + +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) +template +struct hash> { + size_t operator()(std::basic_string_view const& sv) const noexcept { + return hash_bytes(sv.data(), sizeof(CharT) * sv.size()); + } +}; +#endif + +template +struct hash { + size_t operator()(T* ptr) const noexcept { + return hash_int(reinterpret_cast(ptr)); + } +}; + +template +struct hash> { + size_t operator()(std::unique_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash> { + size_t operator()(std::shared_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash::value>::type> { + size_t operator()(Enum e) const noexcept { + using Underlying = typename std::underlying_type::type; + return hash{}(static_cast(e)); + } +}; + +#define ROBIN_HOOD_HASH_INT(T) \ + template <> \ + struct hash { \ + size_t operator()(T const& obj) const noexcept { \ + return hash_int(static_cast(obj)); \ + } \ + } + +#if defined(__GNUC__) && !defined(__clang__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wuseless-cast" +#endif +// see https://en.cppreference.com/w/cpp/utility/hash +ROBIN_HOOD_HASH_INT(bool); +ROBIN_HOOD_HASH_INT(char); +ROBIN_HOOD_HASH_INT(signed char); +ROBIN_HOOD_HASH_INT(unsigned char); +ROBIN_HOOD_HASH_INT(char16_t); +ROBIN_HOOD_HASH_INT(char32_t); +#if ROBIN_HOOD(HAS_NATIVE_WCHART) +ROBIN_HOOD_HASH_INT(wchar_t); +#endif +ROBIN_HOOD_HASH_INT(short); +ROBIN_HOOD_HASH_INT(unsigned short); +ROBIN_HOOD_HASH_INT(int); +ROBIN_HOOD_HASH_INT(unsigned int); +ROBIN_HOOD_HASH_INT(long); +ROBIN_HOOD_HASH_INT(long long); +ROBIN_HOOD_HASH_INT(unsigned long); +ROBIN_HOOD_HASH_INT(unsigned long long); +#if defined(__GNUC__) && !defined(__clang__) +# pragma GCC diagnostic pop +#endif +namespace detail { + +template +struct void_type { + using type = void; +}; + +template +struct has_is_transparent : public std::false_type {}; + +template +struct has_is_transparent::type> + : public std::true_type {}; + +// using wrapper classes for hash and key_equal prevents the diamond problem when the same type +// is used. see https://stackoverflow.com/a/28771920/48181 +template +struct WrapHash : public T { + WrapHash() = default; + explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval()))) + : T(o) {} +}; + +template +struct WrapKeyEqual : public T { + WrapKeyEqual() = default; + explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval()))) + : T(o) {} +}; + +// A highly optimized hashmap implementation, using the Robin Hood algorithm. +// +// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but +// be about 2x faster in most cases and require much less allocations. +// +// This implementation uses the following memory layout: +// +// [Node, Node, ... Node | info, info, ... infoSentinel ] +// +// * Node: either a DataNode that directly has the std::pair as member, +// or a DataNode with a pointer to std::pair. Which DataNode representation to use +// depends on how fast the swap() operation is. Heuristically, this is automatically choosen +// based on sizeof(). there are always 2^n Nodes. +// +// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes. +// Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the +// corresponding node contains data. Set to 2 means the corresponding Node is filled, but it +// actually belongs to the previous position and was pushed out because that place is already +// taken. +// +// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the +// need for a idx variable. +// +// According to STL, order of templates has effect on throughput. That's why I've moved the +// boolean to the front. +// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/ +template +class Table + : public WrapHash, + public WrapKeyEqual, + detail::NodeAllocator< + typename std::conditional< + std::is_void::value, Key, + robin_hood::pair::type, T>>::type, + 4, 16384, IsFlat> { +public: + static constexpr bool is_flat = IsFlat; + static constexpr bool is_map = !std::is_void::value; + static constexpr bool is_set = !is_map; + static constexpr bool is_transparent = + has_is_transparent::value && has_is_transparent::value; + + using key_type = Key; + using mapped_type = T; + using value_type = typename std::conditional< + is_set, Key, + robin_hood::pair::type, T>>::type; + using size_type = size_t; + using hasher = Hash; + using key_equal = KeyEqual; + using Self = Table; + +private: + static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100, + "MaxLoadFactor100 needs to be >10 && < 100"); + + using WHash = WrapHash; + using WKeyEqual = WrapKeyEqual; + + // configuration defaults + + // make sure we have 8 elements, needed to quickly rehash mInfo + static constexpr size_t InitialNumElements = sizeof(uint64_t); + static constexpr uint32_t InitialInfoNumBits = 5; + static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits; + static constexpr size_t InfoMask = InitialInfoInc - 1U; + static constexpr uint8_t InitialInfoHashShift = 0; + using DataPool = detail::NodeAllocator; + + // type needs to be wider than uint8_t. + using InfoType = uint32_t; + + // DataNode //////////////////////////////////////////////////////// + + // Primary template for the data node. We have special implementations for small and big + // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these + // on the heap so swap merely swaps a pointer. + template + class DataNode {}; + + // Small: just allocate on the stack. + template + class DataNode final { + public: + template + explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept( + noexcept(value_type(std::forward(args)...))) + : mData(std::forward(args)...) {} + + DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept( + std::is_nothrow_move_constructible::value) + : mData(std::move(n.mData)) {} + + // doesn't do anything + void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {} + void destroyDoNotDeallocate() noexcept {} + + value_type const* operator->() const noexcept { + return &mData; + } + value_type* operator->() noexcept { + return &mData; + } + + const value_type& operator*() const noexcept { + return mData; + } + + value_type& operator*() noexcept { + return mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return mData.first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type + getFirst() const noexcept { + return mData.first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() const noexcept { + return mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() noexcept { + return mData.second; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() const noexcept { + return mData.second; + } + + void swap(DataNode& o) noexcept( + noexcept(std::declval().swap(std::declval()))) { + mData.swap(o.mData); + } + + private: + value_type mData; + }; + + // big object: allocate on heap. + template + class DataNode { + public: + template + explicit DataNode(M& map, Args&&... args) + : mData(map.allocate()) { + ::new (static_cast(mData)) value_type(std::forward(args)...); + } + + DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept + : mData(std::move(n.mData)) {} + + void destroy(M& map) noexcept { + // don't deallocate, just put it into list of datapool. + mData->~value_type(); + map.deallocate(mData); + } + + void destroyDoNotDeallocate() noexcept { + mData->~value_type(); + } + + value_type const* operator->() const noexcept { + return mData; + } + + value_type* operator->() noexcept { + return mData; + } + + const value_type& operator*() const { + return *mData; + } + + value_type& operator*() { + return *mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return mData->first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() noexcept { + return *mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type + getFirst() const noexcept { + return mData->first; + } + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getFirst() const noexcept { + return *mData; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() noexcept { + return mData->second; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::type getSecond() const noexcept { + return mData->second; + } + + void swap(DataNode& o) noexcept { + using std::swap; + swap(mData, o.mData); + } + + private: + value_type* mData; + }; + + using Node = DataNode; + + // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required) + ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept { + return n.getFirst(); + } + + // in case we have void mapped_type, we are not using a pair, thus we just route k through. + // No need to disable this because it's just not used if not applicable. + ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept { + return k; + } + + // in case we have non-void mapped_type, we have a standard robin_hood::pair + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::value, key_type const&>::type + getFirstConst(value_type const& vt) const noexcept { + return vt.first; + } + + // Cloner ////////////////////////////////////////////////////////// + + template + struct Cloner; + + // fast path: Just copy data, without allocating anything. + template + struct Cloner { + void operator()(M const& source, M& target) const { + auto const* const src = reinterpret_cast(source.mKeyVals); + auto* tgt = reinterpret_cast(target.mKeyVals); + auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1); + std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt); + } + }; + + template + struct Cloner { + void operator()(M const& s, M& t) const { + auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1); + std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo); + + for (size_t i = 0; i < numElementsWithBuffer; ++i) { + if (t.mInfo[i]) { + ::new (static_cast(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]); + } + } + } + }; + + // Destroyer /////////////////////////////////////////////////////// + + template + struct Destroyer {}; + + template + struct Destroyer { + void nodes(M& m) const noexcept { + m.mNumElements = 0; + } + + void nodesDoNotDeallocate(M& m) const noexcept { + m.mNumElements = 0; + } + }; + + template + struct Destroyer { + void nodes(M& m) const noexcept { + m.mNumElements = 0; + // clear also resets mInfo to 0, that's sometimes not necessary. + auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); + + for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { + if (0 != m.mInfo[idx]) { + Node& n = m.mKeyVals[idx]; + n.destroy(m); + n.~Node(); + } + } + } + + void nodesDoNotDeallocate(M& m) const noexcept { + m.mNumElements = 0; + // clear also resets mInfo to 0, that's sometimes not necessary. + auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); + for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { + if (0 != m.mInfo[idx]) { + Node& n = m.mKeyVals[idx]; + n.destroyDoNotDeallocate(); + n.~Node(); + } + } + } + }; + + // Iter //////////////////////////////////////////////////////////// + + struct fast_forward_tag {}; + + // generic iterator for both const_iterator and iterator. + template + // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions) + class Iter { + private: + using NodePtr = typename std::conditional::type; + + public: + using difference_type = std::ptrdiff_t; + using value_type = typename Self::value_type; + using reference = typename std::conditional::type; + using pointer = typename std::conditional::type; + using iterator_category = std::forward_iterator_tag; + + // default constructed iterator can be compared to itself, but WON'T return true when + // compared to end(). + Iter() = default; + + // Rule of zero: nothing specified. The conversion constructor is only enabled for + // iterator to const_iterator, so it doesn't accidentally work as a copy ctor. + + // Conversion constructor from iterator to const_iterator. + template ::type> + // NOLINTNEXTLINE(hicpp-explicit-conversions) + Iter(Iter const& other) noexcept + : mKeyVals(other.mKeyVals) + , mInfo(other.mInfo) {} + + Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept + : mKeyVals(valPtr) + , mInfo(infoPtr) {} + + Iter(NodePtr valPtr, uint8_t const* infoPtr, + fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept + : mKeyVals(valPtr) + , mInfo(infoPtr) { + fastForward(); + } + + template ::type> + Iter& operator=(Iter const& other) noexcept { + mKeyVals = other.mKeyVals; + mInfo = other.mInfo; + return *this; + } + + // prefix increment. Undefined behavior if we are at end()! + Iter& operator++() noexcept { + mInfo++; + mKeyVals++; + fastForward(); + return *this; + } + + Iter operator++(int) noexcept { + Iter tmp = *this; + ++(*this); + return tmp; + } + + reference operator*() const { + return **mKeyVals; + } + + pointer operator->() const { + return &**mKeyVals; + } + + template + bool operator==(Iter const& o) const noexcept { + return mKeyVals == o.mKeyVals; + } + + template + bool operator!=(Iter const& o) const noexcept { + return mKeyVals != o.mKeyVals; + } + + private: + // fast forward to the next non-free info byte + // I've tried a few variants that don't depend on intrinsics, but unfortunately they are + // quite a bit slower than this one. So I've reverted that change again. See map_benchmark. + void fastForward() noexcept { + size_t n = 0; + while (0U == (n = detail::unaligned_load(mInfo))) { + mInfo += sizeof(size_t); + mKeyVals += sizeof(size_t); + } +#if defined(ROBIN_HOOD_DISABLE_INTRINSICS) + // we know for certain that within the next 8 bytes we'll find a non-zero one. + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 4; + mKeyVals += 4; + } + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 2; + mKeyVals += 2; + } + if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) { + mInfo += 1; + mKeyVals += 1; + } +#else +# if ROBIN_HOOD(LITTLE_ENDIAN) + auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; +# else + auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; +# endif + mInfo += inc; + mKeyVals += inc; +#endif + } + + friend class Table; + NodePtr mKeyVals{nullptr}; + uint8_t const* mInfo{nullptr}; + }; + + //////////////////////////////////////////////////////////////////// + + // highly performance relevant code. + // Lower bits are used for indexing into the array (2^n size) + // The upper 1-5 bits need to be a reasonable good hash, to save comparisons. + template + void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const { + // In addition to whatever hash is used, add another mul & shift so we get better hashing. + // This serves as a bad hash prevention, if the given data is + // badly mixed. + auto h = static_cast(WHash::operator()(key)); + + h *= mHashMultiplier; + h ^= h >> 33U; + + // the lower InitialInfoNumBits are reserved for info. + *info = mInfoInc + static_cast((h & InfoMask) >> mInfoHashShift); + *idx = (static_cast(h) >> InitialInfoNumBits) & mMask; + } + + // forwards the index by one, wrapping around at the end + void next(InfoType* info, size_t* idx) const noexcept { + *idx = *idx + 1; + *info += mInfoInc; + } + + void nextWhileLess(InfoType* info, size_t* idx) const noexcept { + // unrolling this by hand did not bring any speedups. + while (*info < mInfo[*idx]) { + next(info, idx); + } + } + + // Shift everything up by one element. Tries to move stuff around. + void + shiftUp(size_t startIdx, + size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable::value) { + auto idx = startIdx; + ::new (static_cast(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1])); + while (--idx != insertion_idx) { + mKeyVals[idx] = std::move(mKeyVals[idx - 1]); + } + + idx = startIdx; + while (idx != insertion_idx) { + ROBIN_HOOD_COUNT(shiftUp) + mInfo[idx] = static_cast(mInfo[idx - 1] + mInfoInc); + if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) { + mMaxNumElementsAllowed = 0; + } + --idx; + } + } + + void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable::value) { + // until we find one that is either empty or has zero offset. + // TODO(martinus) we don't need to move everything, just the last one for the same + // bucket. + mKeyVals[idx].destroy(*this); + + // until we find one that is either empty or has zero offset. + while (mInfo[idx + 1] >= 2 * mInfoInc) { + ROBIN_HOOD_COUNT(shiftDown) + mInfo[idx] = static_cast(mInfo[idx + 1] - mInfoInc); + mKeyVals[idx] = std::move(mKeyVals[idx + 1]); + ++idx; + } + + mInfo[idx] = 0; + // don't destroy, we've moved it + // mKeyVals[idx].destroy(*this); + mKeyVals[idx].~Node(); + } + + // copy of find(), except that it returns iterator instead of const_iterator. + template + ROBIN_HOOD(NODISCARD) + size_t findIdx(Other const& key) const { + size_t idx{}; + InfoType info{}; + keyToIdx(key, &idx, &info); + + do { + // unrolling this twice gives a bit of a speedup. More unrolling did not help. + if (info == mInfo[idx] && + ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { + return idx; + } + next(&info, &idx); + if (info == mInfo[idx] && + ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { + return idx; + } + next(&info, &idx); + } while (info <= mInfo[idx]); + + // nothing found! + return mMask == 0 ? 0 + : static_cast(std::distance( + mKeyVals, reinterpret_cast_no_cast_align_warning(mInfo))); + } + + void cloneData(const Table& o) { + Cloner()(o, *this); + } + + // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. + // @return True on success, false if something went wrong + void insert_move(Node&& keyval) { + // we don't retry, fail if overflowing + // don't need to check max num elements + if (0 == mMaxNumElementsAllowed && !try_increase_info()) { + throwOverflowError(); + } + + size_t idx{}; + InfoType info{}; + keyToIdx(keyval.getFirst(), &idx, &info); + + // skip forward. Use <= because we are certain that the element is not there. + while (info <= mInfo[idx]) { + idx = idx + 1; + info += mInfoInc; + } + + // key not found, so we are now exactly where we want to insert it. + auto const insertion_idx = idx; + auto const insertion_info = static_cast(info); + if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { + mMaxNumElementsAllowed = 0; + } + + // find an empty spot + while (0 != mInfo[idx]) { + next(&info, &idx); + } + + auto& l = mKeyVals[insertion_idx]; + if (idx == insertion_idx) { + ::new (static_cast(&l)) Node(std::move(keyval)); + } else { + shiftUp(idx, insertion_idx); + l = std::move(keyval); + } + + // put at empty spot + mInfo[insertion_idx] = insertion_info; + + ++mNumElements; + } + +public: + using iterator = Iter; + using const_iterator = Iter; + + Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual())) + : WHash() + , WKeyEqual() { + ROBIN_HOOD_TRACE(this) + } + + // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. + // This tremendously speeds up ctor & dtor of a map that never receives an element. The + // penalty is payed at the first insert, and not before. Lookup of this empty map works + // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the + // standard, but we can ignore it. + explicit Table( + size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{}, + const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal))) + : WHash(h) + , WKeyEqual(equal) { + ROBIN_HOOD_TRACE(this) + } + + template + Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, + const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) + : WHash(h) + , WKeyEqual(equal) { + ROBIN_HOOD_TRACE(this) + insert(first, last); + } + + Table(std::initializer_list initlist, + size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, + const KeyEqual& equal = KeyEqual{}) + : WHash(h) + , WKeyEqual(equal) { + ROBIN_HOOD_TRACE(this) + insert(initlist.begin(), initlist.end()); + } + + Table(Table&& o) noexcept + : WHash(std::move(static_cast(o))) + , WKeyEqual(std::move(static_cast(o))) + , DataPool(std::move(static_cast(o))) { + ROBIN_HOOD_TRACE(this) + if (o.mMask) { + mHashMultiplier = std::move(o.mHashMultiplier); + mKeyVals = std::move(o.mKeyVals); + mInfo = std::move(o.mInfo); + mNumElements = std::move(o.mNumElements); + mMask = std::move(o.mMask); + mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); + mInfoInc = std::move(o.mInfoInc); + mInfoHashShift = std::move(o.mInfoHashShift); + // set other's mask to 0 so its destructor won't do anything + o.init(); + } + } + + Table& operator=(Table&& o) noexcept { + ROBIN_HOOD_TRACE(this) + if (&o != this) { + if (o.mMask) { + // only move stuff if the other map actually has some data + destroy(); + mHashMultiplier = std::move(o.mHashMultiplier); + mKeyVals = std::move(o.mKeyVals); + mInfo = std::move(o.mInfo); + mNumElements = std::move(o.mNumElements); + mMask = std::move(o.mMask); + mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); + mInfoInc = std::move(o.mInfoInc); + mInfoHashShift = std::move(o.mInfoHashShift); + WHash::operator=(std::move(static_cast(o))); + WKeyEqual::operator=(std::move(static_cast(o))); + DataPool::operator=(std::move(static_cast(o))); + + o.init(); + + } else { + // nothing in the other map => just clear us. + clear(); + } + } + return *this; + } + + Table(const Table& o) + : WHash(static_cast(o)) + , WKeyEqual(static_cast(o)) + , DataPool(static_cast(o)) { + ROBIN_HOOD_TRACE(this) + if (!o.empty()) { + // not empty: create an exact copy. it is also possible to just iterate through all + // elements and insert them, but copying is probably faster. + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mHashMultiplier = o.mHashMultiplier; + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); + // no need for calloc because clonData does memcpy + mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); + mNumElements = o.mNumElements; + mMask = o.mMask; + mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; + mInfoInc = o.mInfoInc; + mInfoHashShift = o.mInfoHashShift; + cloneData(o); + } + } + + // Creates a copy of the given map. Copy constructor of each entry is used. + // Not sure why clang-tidy thinks this doesn't handle self assignment, it does + // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) + Table& operator=(Table const& o) { + ROBIN_HOOD_TRACE(this) + if (&o == this) { + // prevent assigning of itself + return *this; + } + + // we keep using the old allocator and not assign the new one, because we want to keep + // the memory available. when it is the same size. + if (o.empty()) { + if (0 == mMask) { + // nothing to do, we are empty too + return *this; + } + + // not empty: destroy what we have there + // clear also resets mInfo to 0, that's sometimes not necessary. + destroy(); + init(); + WHash::operator=(static_cast(o)); + WKeyEqual::operator=(static_cast(o)); + DataPool::operator=(static_cast(o)); + + return *this; + } + + // clean up old stuff + Destroyer::value>{}.nodes(*this); + + if (mMask != o.mMask) { + // no luck: we don't have the same array size allocated, so we need to realloc. + if (0 != mMask) { + // only deallocate if we actually have data! + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); + } + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); + + // no need for calloc here because cloneData performs a memcpy. + mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); + // sentinel is set in cloneData + } + WHash::operator=(static_cast(o)); + WKeyEqual::operator=(static_cast(o)); + DataPool::operator=(static_cast(o)); + mHashMultiplier = o.mHashMultiplier; + mNumElements = o.mNumElements; + mMask = o.mMask; + mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; + mInfoInc = o.mInfoInc; + mInfoHashShift = o.mInfoHashShift; + cloneData(o); + + return *this; + } + + // Swaps everything between the two maps. + void swap(Table& o) { + ROBIN_HOOD_TRACE(this) + using std::swap; + swap(o, *this); + } + + // Clears all data, without resizing. + void clear() { + ROBIN_HOOD_TRACE(this) + if (empty()) { + // don't do anything! also important because we don't want to write to + // DummyInfoByte::b, even though we would just write 0 to it. + return; + } + + Destroyer::value>{}.nodes(*this); + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); + // clear everything, then set the sentinel again + uint8_t const z = 0; + std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z); + mInfo[numElementsWithBuffer] = 1; + + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + } + + // Destroys the map and all it's contents. + ~Table() { + ROBIN_HOOD_TRACE(this) + destroy(); + } + + // Checks if both tables contain the same entries. Order is irrelevant. + bool operator==(const Table& other) const { + ROBIN_HOOD_TRACE(this) + if (other.size() != size()) { + return false; + } + for (auto const& otherEntry : other) { + if (!has(otherEntry)) { + return false; + } + } + + return true; + } + + bool operator!=(const Table& other) const { + ROBIN_HOOD_TRACE(this) + return !operator==(other); + } + + template + typename std::enable_if::value, Q&>::type operator[](const key_type& key) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(key), std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); + } + + template + typename std::enable_if::value, Q&>::type operator[](key_type&& key) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); + } + + template + void insert(Iter first, Iter last) { + for (; first != last; ++first) { + // value_type ctor needed because this might be called with std::pair's + insert(value_type(*first)); + } + } + + void insert(std::initializer_list ilist) { + for (auto&& vt : ilist) { + insert(std::move(vt)); + } + } + + template + std::pair emplace(Args&&... args) { + ROBIN_HOOD_TRACE(this) + Node n{*this, std::forward(args)...}; + auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n)); + switch (idxAndState.second) { + case InsertionState::key_found: + n.destroy(*this); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node(*this, std::move(n)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = std::move(n); + break; + + case InsertionState::overflow_error: + n.destroy(*this); + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + iterator emplace_hint(const_iterator position, Args&&... args) { + (void)position; + return emplace(std::forward(args)...).first; + } + + template + std::pair try_emplace(const key_type& key, Args&&... args) { + return try_emplace_impl(key, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& key, Args&&... args) { + return try_emplace_impl(std::move(key), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) { + (void)hint; + return try_emplace_impl(key, std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) { + (void)hint; + return try_emplace_impl(std::move(key), std::forward(args)...).first; + } + + template + std::pair insert_or_assign(const key_type& key, Mapped&& obj) { + return insertOrAssignImpl(key, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& key, Mapped&& obj) { + return insertOrAssignImpl(std::move(key), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(key, std::forward(obj)).first; + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(std::move(key), std::forward(obj)).first; + } + + std::pair insert(const value_type& keyval) { + ROBIN_HOOD_TRACE(this) + return emplace(keyval); + } + + iterator insert(const_iterator hint, const value_type& keyval) { + (void)hint; + return emplace(keyval).first; + } + + std::pair insert(value_type&& keyval) { + return emplace(std::move(keyval)); + } + + iterator insert(const_iterator hint, value_type&& keyval) { + (void)hint; + return emplace(std::move(keyval)).first; + } + + // Returns 1 if key is found, 0 otherwise. + size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { + return 1; + } + return 0; + } + + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::type count(const OtherKey& key) const { + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { + return 1; + } + return 0; + } + + bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard) + return 1U == count(key); + } + + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::type contains(const OtherKey& key) const { + return 1U == count(key); + } + + // Returns a reference to the value found for key. + // Throws std::out_of_range if element cannot be found + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::value, Q&>::type at(key_type const& key) { + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { + doThrow("key not found"); + } + return kv->getSecond(); + } + + // Returns a reference to the value found for key. + // Throws std::out_of_range if element cannot be found + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::value, Q const&>::type at(key_type const& key) const { + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { + doThrow("key not found"); + } + return kv->getSecond(); + } + + const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + template + const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + template + typename std::enable_if::type // NOLINT(modernize-use-nodiscard) + find(const OtherKey& key) const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + iterator find(const key_type& key) { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + template + iterator find(const OtherKey& key, is_transparent_tag /*unused*/) { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + template + typename std::enable_if::type find(const OtherKey& key) { + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + iterator begin() { + ROBIN_HOOD_TRACE(this) + if (empty()) { + return end(); + } + return iterator(mKeyVals, mInfo, fast_forward_tag{}); + } + const_iterator begin() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return cbegin(); + } + const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + if (empty()) { + return cend(); + } + return const_iterator(mKeyVals, mInfo, fast_forward_tag{}); + } + + iterator end() { + ROBIN_HOOD_TRACE(this) + // no need to supply valid info pointer: end() must not be dereferenced, and only node + // pointer is compared. + return iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; + } + const_iterator end() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return cend(); + } + const_iterator cend() const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return const_iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; + } + + iterator erase(const_iterator pos) { + ROBIN_HOOD_TRACE(this) + // its safe to perform const cast here + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + return erase(iterator{const_cast(pos.mKeyVals), const_cast(pos.mInfo)}); + } + + // Erases element at pos, returns iterator to the next element. + iterator erase(iterator pos) { + ROBIN_HOOD_TRACE(this) + // we assume that pos always points to a valid entry, and not end(). + auto const idx = static_cast(pos.mKeyVals - mKeyVals); + + shiftDown(idx); + --mNumElements; + + if (*pos.mInfo) { + // we've backward shifted, return this again + return pos; + } + + // no backward shift, return next element + return ++pos; + } + + size_t erase(const key_type& key) { + ROBIN_HOOD_TRACE(this) + size_t idx{}; + InfoType info{}; + keyToIdx(key, &idx, &info); + + // check while info matches with the source idx + do { + if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + shiftDown(idx); + --mNumElements; + return 1; + } + next(&info, &idx); + } while (info <= mInfo[idx]); + + // nothing found to delete + return 0; + } + + // reserves space for the specified number of elements. Makes sure the old data fits. + // exactly the same as reserve(c). + void rehash(size_t c) { + // forces a reserve + reserve(c, true); + } + + // reserves space for the specified number of elements. Makes sure the old data fits. + // Exactly the same as rehash(c). Use rehash(0) to shrink to fit. + void reserve(size_t c) { + // reserve, but don't force rehash + reserve(c, false); + } + + // If possible reallocates the map to a smaller one. This frees the underlying table. + // Does not do anything if load_factor is too large for decreasing the table's size. + void compact() { + ROBIN_HOOD_TRACE(this) + auto newSize = InitialNumElements; + while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) { + newSize *= 2; + } + if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { + throwOverflowError(); + } + + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (newSize < mMask + 1) { + rehashPowerOfTwo(newSize, true); + } + } + + size_type size() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return mNumElements; + } + + size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return static_cast(-1); + } + + ROBIN_HOOD(NODISCARD) bool empty() const noexcept { + ROBIN_HOOD_TRACE(this) + return 0 == mNumElements; + } + + float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return MaxLoadFactor100 / 100.0F; + } + + // Average number of elements per bucket. Since we allow only 1 per bucket + float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) + return static_cast(size()) / static_cast(mMask + 1); + } + + ROBIN_HOOD(NODISCARD) size_t mask() const noexcept { + ROBIN_HOOD_TRACE(this) + return mMask; + } + + ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept { + if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits::max)() / 100)) { + return maxElements * MaxLoadFactor100 / 100; + } + + // we might be a bit inprecise, but since maxElements is quite large that doesn't matter + return (maxElements / 100) * MaxLoadFactor100; + } + + ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept { + // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load + // 64bit types. + return numElements + sizeof(uint64_t); + } + + ROBIN_HOOD(NODISCARD) + size_t calcNumElementsWithBuffer(size_t numElements) const noexcept { + auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements); + return numElements + (std::min)(maxNumElementsAllowed, (static_cast(0xFF))); + } + + // calculation only allowed for 2^n values + ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const { +#if ROBIN_HOOD(BITNESS) == 64 + return numElements * sizeof(Node) + calcNumBytesInfo(numElements); +#else + // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows. + auto const ne = static_cast(numElements); + auto const s = static_cast(sizeof(Node)); + auto const infos = static_cast(calcNumBytesInfo(numElements)); + + auto const total64 = ne * s + infos; + auto const total = static_cast(total64); + + if (ROBIN_HOOD_UNLIKELY(static_cast(total) != total64)) { + throwOverflowError(); + } + return total; +#endif + } + +private: + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::value, bool>::type has(const value_type& e) const { + ROBIN_HOOD_TRACE(this) + auto it = find(e.first); + return it != end() && it->second == e.second; + } + + template + ROBIN_HOOD(NODISCARD) + typename std::enable_if::value, bool>::type has(const value_type& e) const { + ROBIN_HOOD_TRACE(this) + return find(e) != end(); + } + + void reserve(size_t c, bool forceRehash) { + ROBIN_HOOD_TRACE(this) + auto const minElementsAllowed = (std::max)(c, mNumElements); + auto newSize = InitialNumElements; + while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { + newSize *= 2; + } + if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { + throwOverflowError(); + } + + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (forceRehash || newSize > mMask + 1) { + rehashPowerOfTwo(newSize, false); + } + } + + // reserves space for at least the specified number of elements. + // only works if numBuckets if power of two + // True on success, false otherwise + void rehashPowerOfTwo(size_t numBuckets, bool forceFree) { + ROBIN_HOOD_TRACE(this) + + Node* const oldKeyVals = mKeyVals; + uint8_t const* const oldInfo = mInfo; + + const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); + + // resize operation: move stuff + initData(numBuckets); + if (oldMaxElementsWithBuffer > 1) { + for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) { + if (oldInfo[i] != 0) { + // might throw an exception, which is really bad since we are in the middle of + // moving stuff. + insert_move(std::move(oldKeyVals[i])); + // destroy the node but DON'T destroy the data. + oldKeyVals[i].~Node(); + } + } + + // this check is not necessary as it's guarded by the previous if, but it helps + // silence g++'s overeager "attempt to free a non-heap object 'map' + // [-Werror=free-nonheap-object]" warning. + if (oldKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { + // don't destroy old data: put it into the pool instead + if (forceFree) { + std::free(oldKeyVals); + } else { + DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); + } + } + } + } + + ROBIN_HOOD(NOINLINE) void throwOverflowError() const { +#if ROBIN_HOOD(HAS_EXCEPTIONS) + throw std::overflow_error("robin_hood::map overflow"); +#else + abort(); +#endif + } + + template + std::pair try_emplace_impl(OtherKey&& key, Args&&... args) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + std::pair insertOrAssignImpl(OtherKey&& key, Mapped&& obj) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + mKeyVals[idxAndState.first].getSecond() = std::forward(obj); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + void initData(size_t max_elements) { + mNumElements = 0; + mMask = max_elements - 1; + mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); + + auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements); + + // malloc & zero mInfo. Faster than calloc everything. + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = reinterpret_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); + mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); + std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node)); + + // set sentinel + mInfo[numElementsWithBuffer] = 1; + + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + } + + enum class InsertionState { overflow_error, key_found, new_node, overwrite_node }; + + // Finds key, and if not already present prepares a spot where to pot the key & value. + // This potentially shifts nodes out of the way, updates mInfo and number of inserted + // elements, so the only operation left to do is create/assign a new node at that spot. + template + std::pair insertKeyPrepareEmptySpot(OtherKey&& key) { + for (int i = 0; i < 256; ++i) { + size_t idx{}; + InfoType info{}; + keyToIdx(key, &idx, &info); + nextWhileLess(&info, &idx); + + // while we potentially have a match + while (info == mInfo[idx]) { + if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + // key already exists, do NOT insert. + // see http://en.cppreference.com/w/cpp/container/unordered_map/insert + return std::make_pair(idx, InsertionState::key_found); + } + next(&info, &idx); + } + + // unlikely that this evaluates to true + if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { + if (!increase_size()) { + return std::make_pair(size_t(0), InsertionState::overflow_error); + } + continue; + } + + // key not found, so we are now exactly where we want to insert it. + auto const insertion_idx = idx; + auto const insertion_info = info; + if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { + mMaxNumElementsAllowed = 0; + } + + // find an empty spot + while (0 != mInfo[idx]) { + next(&info, &idx); + } + + if (idx != insertion_idx) { + shiftUp(idx, insertion_idx); + } + // put at empty spot + mInfo[insertion_idx] = static_cast(insertion_info); + ++mNumElements; + return std::make_pair(insertion_idx, idx == insertion_idx + ? InsertionState::new_node + : InsertionState::overwrite_node); + } + + // enough attempts failed, so finally give up. + return std::make_pair(size_t(0), InsertionState::overflow_error); + } + + bool try_increase_info() { + ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements + << ", maxNumElementsAllowed=" + << calcMaxNumElementsAllowed(mMask + 1)) + if (mInfoInc <= 2) { + // need to be > 2 so that shift works (otherwise undefined behavior!) + return false; + } + // we got space left, try to make info smaller + mInfoInc = static_cast(mInfoInc >> 1U); + + // remove one bit of the hash, leaving more space for the distance info. + // This is extremely fast because we can operate on 8 bytes at once. + ++mInfoHashShift; + auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); + + for (size_t i = 0; i < numElementsWithBuffer; i += 8) { + auto val = unaligned_load(mInfo + i); + val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f); + std::memcpy(mInfo + i, &val, sizeof(val)); + } + // update sentinel, which might have been cleared out! + mInfo[numElementsWithBuffer] = 1; + + mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); + return true; + } + + // True if resize was possible, false otherwise + bool increase_size() { + // nothing allocated yet? just allocate InitialNumElements + if (0 == mMask) { + initData(InitialNumElements); + return true; + } + + auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); + if (mNumElements < maxNumElementsAllowed && try_increase_info()) { + return true; + } + + ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed=" + << maxNumElementsAllowed << ", load=" + << (static_cast(mNumElements) * 100.0 / + (static_cast(mMask) + 1))) + + if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { + // we have to resize, even though there would still be plenty of space left! + // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case + // we have to rehash a few times + nextHashMultiplier(); + rehashPowerOfTwo(mMask + 1, true); + } else { + // we've reached the capacity of the map, so the hash seems to work nice. Keep using it. + rehashPowerOfTwo((mMask + 1) * 2, false); + } + return true; + } + + void nextHashMultiplier() { + // adding an *even* number, so that the multiplier will always stay odd. This is necessary + // so that the hash stays a mixing function (and thus doesn't have any information loss). + mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54); + } + + void destroy() { + if (0 == mMask) { + // don't deallocate! + return; + } + + Destroyer::value>{} + .nodesDoNotDeallocate(*this); + + // This protection against not deleting mMask shouldn't be needed as it's sufficiently + // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise + // reports a compile error: attempt to free a non-heap object 'fm' + // [-Werror=free-nonheap-object] + if (mKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); + } + } + + void init() noexcept { + mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); + mInfo = reinterpret_cast(&mMask); + mNumElements = 0; + mMask = 0; + mMaxNumElementsAllowed = 0; + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + } + + // members are sorted so no padding occurs + uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53); // 8 byte 8 + Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 16 + uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 24 + size_t mNumElements = 0; // 8 byte 32 + size_t mMask = 0; // 8 byte 40 + size_t mMaxNumElementsAllowed = 0; // 8 byte 48 + InfoType mInfoInc = InitialInfoInc; // 4 byte 52 + InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 56 + // 16 byte 56 if NodeAllocator +}; + +} // namespace detail + +// map + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_flat_map = detail::Table; + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_node_map = detail::Table; + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_map = + detail::Table) <= sizeof(size_t) * 6 && + std::is_nothrow_move_constructible>::value && + std::is_nothrow_move_assignable>::value, + MaxLoadFactor100, Key, T, Hash, KeyEqual>; + +// set + +template , typename KeyEqual = std::equal_to, + size_t MaxLoadFactor100 = 80> +using unordered_flat_set = detail::Table; + +template , typename KeyEqual = std::equal_to, + size_t MaxLoadFactor100 = 80> +using unordered_node_set = detail::Table; + +template , typename KeyEqual = std::equal_to, + size_t MaxLoadFactor100 = 80> +using unordered_set = detail::Table::value && + std::is_nothrow_move_assignable::value, + MaxLoadFactor100, Key, void, Hash, KeyEqual>; + +} // namespace robin_hood + +#endif From 82959c039487deed0e49a6568d37f2b1d3d835b7 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 28 Feb 2023 22:31:03 -0800 Subject: [PATCH 22/49] back to unordered map --- src/Common.hpp | 3 +- src/robin_hood.h | 2544 ---------------------------------------------- 2 files changed, 1 insertion(+), 2546 deletions(-) delete mode 100644 src/robin_hood.h diff --git a/src/Common.hpp b/src/Common.hpp index 4cc8596..4b290a3 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -9,13 +9,12 @@ #include #include #include -#include "robin_hood.h" #include "roaring.hh" #include "hash.hpp" #define BUSTOOLS_VERSION "0.42.0" -#define u_map_ robin_hood::unordered_flat_map +#define u_map_ std::unordered_map enum CAPTURE_TYPE : char { CAPTURE_NONE = 0, diff --git a/src/robin_hood.h b/src/robin_hood.h deleted file mode 100644 index 0af031f..0000000 --- a/src/robin_hood.h +++ /dev/null @@ -1,2544 +0,0 @@ -// ______ _____ ______ _________ -// ______________ ___ /_ ___(_)_______ ___ /_ ______ ______ ______ / -// __ ___/_ __ \__ __ \__ / __ __ \ __ __ \_ __ \_ __ \_ __ / -// _ / / /_/ /_ /_/ /_ / _ / / / _ / / // /_/ // /_/ // /_/ / -// /_/ \____/ /_.___/ /_/ /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/ -// _/_____/ -// -// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20 -// https://github.com/martinus/robin-hood-hashing -// -// Licensed under the MIT License . -// SPDX-License-Identifier: MIT -// Copyright (c) 2018-2021 Martin Ankerl -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#ifndef ROBIN_HOOD_H_INCLUDED -#define ROBIN_HOOD_H_INCLUDED - -// see https://semver.org/ -#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes -#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner -#define ROBIN_HOOD_VERSION_PATCH 5 // for backwards-compatible bug fixes - -#include -#include -#include -#include -#include -#include // only to support hash of smart pointers -#include -#include -#include -#include -#if __cplusplus >= 201703L -# include -#endif - -// #define ROBIN_HOOD_LOG_ENABLED -#ifdef ROBIN_HOOD_LOG_ENABLED -# include -# define ROBIN_HOOD_LOG(...) \ - std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; -#else -# define ROBIN_HOOD_LOG(x) -#endif - -// #define ROBIN_HOOD_TRACE_ENABLED -#ifdef ROBIN_HOOD_TRACE_ENABLED -# include -# define ROBIN_HOOD_TRACE(...) \ - std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; -#else -# define ROBIN_HOOD_TRACE(x) -#endif - -// #define ROBIN_HOOD_COUNT_ENABLED -#ifdef ROBIN_HOOD_COUNT_ENABLED -# include -# define ROBIN_HOOD_COUNT(x) ++counts().x; -namespace robin_hood { -struct Counts { - uint64_t shiftUp{}; - uint64_t shiftDown{}; -}; -inline std::ostream& operator<<(std::ostream& os, Counts const& c) { - return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl; -} - -static Counts& counts() { - static Counts counts{}; - return counts; -} -} // namespace robin_hood -#else -# define ROBIN_HOOD_COUNT(x) -#endif - -// all non-argument macros should use this facility. See -// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/ -#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x() - -// mark unused members with this macro -#define ROBIN_HOOD_UNUSED(identifier) - -// bitness -#if SIZE_MAX == UINT32_MAX -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32 -#elif SIZE_MAX == UINT64_MAX -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64 -#else -# error Unsupported bitness -#endif - -// endianess -#ifdef _MSC_VER -# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0 -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#endif - -// inline -#ifdef _MSC_VER -# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline) -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline)) -#endif - -// exceptions -#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0 -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1 -#endif - -// count leading/trailing bits -#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS) -# ifdef _MSC_VER -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 -# endif -# include -# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ - [](size_t mask) noexcept -> int { \ - unsigned long index; \ - return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ - : ROBIN_HOOD(BITNESS); \ - }(x) -# else -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll -# endif -# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) -# endif -#endif - -// fallthrough -#ifndef __has_cpp_attribute // For backwards compatibility -# define __has_cpp_attribute(x) 0 -#endif -#if __has_cpp_attribute(clang::fallthrough) -# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]] -#elif __has_cpp_attribute(gnu::fallthrough) -# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]] -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() -#endif - -// likely/unlikely -#ifdef _MSC_VER -# define ROBIN_HOOD_LIKELY(condition) condition -# define ROBIN_HOOD_UNLIKELY(condition) condition -#else -# define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1) -# define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0) -#endif - -// detect if native wchar_t type is availiable in MSVC -#ifdef _MSC_VER -# ifdef _NATIVE_WCHAR_T_DEFINED -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0 -# endif -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 -#endif - -// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr -#ifdef _MSC_VER -# if _MSC_VER <= 1900 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1 -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 -# endif -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 -#endif - -// workaround missing "is_trivially_copyable" in g++ < 5.0 -// See https://stackoverflow.com/a/31798726/48181 -#if defined(__GNUC__) && __GNUC__ < 5 -# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) -#else -# define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value -#endif - -// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L -#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L - -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) -# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]] -#else -# define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() -#endif - -namespace robin_hood { - -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) -# define ROBIN_HOOD_STD std -#else - -// c++11 compatibility layer -namespace ROBIN_HOOD_STD { -template -struct alignment_of - : std::integral_constant::type)> {}; - -template -class integer_sequence { -public: - using value_type = T; - static_assert(std::is_integral::value, "not integral type"); - static constexpr std::size_t size() noexcept { - return sizeof...(Ints); - } -}; -template -using index_sequence = integer_sequence; - -namespace detail_ { -template -struct IntSeqImpl { - using TValue = T; - static_assert(std::is_integral::value, "not integral type"); - static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)"); - - template - struct IntSeqCombiner; - - template - struct IntSeqCombiner, integer_sequence> { - using TResult = integer_sequence; - }; - - using TResult = - typename IntSeqCombiner::TResult, - typename IntSeqImpl::TResult>::TResult; -}; - -template -struct IntSeqImpl { - using TValue = T; - static_assert(std::is_integral::value, "not integral type"); - static_assert(Begin >= 0, "unexpected argument (Begin<0)"); - using TResult = integer_sequence; -}; - -template -struct IntSeqImpl { - using TValue = T; - static_assert(std::is_integral::value, "not integral type"); - static_assert(Begin >= 0, "unexpected argument (Begin<0)"); - using TResult = integer_sequence; -}; -} // namespace detail_ - -template -using make_integer_sequence = typename detail_::IntSeqImpl::TResult; - -template -using make_index_sequence = make_integer_sequence; - -template -using index_sequence_for = make_index_sequence; - -} // namespace ROBIN_HOOD_STD - -#endif - -namespace detail { - -// make sure we static_cast to the correct type for hash_int -#if ROBIN_HOOD(BITNESS) == 64 -using SizeT = uint64_t; -#else -using SizeT = uint32_t; -#endif - -template -T rotr(T x, unsigned k) { - return (x >> k) | (x << (8U * sizeof(T) - k)); -} - -// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to -// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with -// care! -template -inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept { - return reinterpret_cast(ptr); -} - -template -inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { - return reinterpret_cast(ptr); -} - -// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other -// inlinings more difficult. Throws are also generally the slow path. -template -[[noreturn]] ROBIN_HOOD(NOINLINE) -#if ROBIN_HOOD(HAS_EXCEPTIONS) - void doThrow(Args&&... args) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) - throw E(std::forward(args)...); -} -#else - void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { - abort(); -} -#endif - -template -T* assertNotNull(T* t, Args&&... args) { - if (ROBIN_HOOD_UNLIKELY(nullptr == t)) { - doThrow(std::forward(args)...); - } - return t; -} - -template -inline T unaligned_load(void const* ptr) noexcept { - // using memcpy so we don't get into unaligned load problems. - // compiler should optimize this very well anyways. - T t; - std::memcpy(&t, ptr, sizeof(T)); - return t; -} - -// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor, -// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a -// pointer. -template -class BulkPoolAllocator { -public: - BulkPoolAllocator() noexcept = default; - - // does not copy anything, just creates a new allocator. - BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept - : mHead(nullptr) - , mListForFree(nullptr) {} - - BulkPoolAllocator(BulkPoolAllocator&& o) noexcept - : mHead(o.mHead) - , mListForFree(o.mListForFree) { - o.mListForFree = nullptr; - o.mHead = nullptr; - } - - BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept { - reset(); - mHead = o.mHead; - mListForFree = o.mListForFree; - o.mListForFree = nullptr; - o.mHead = nullptr; - return *this; - } - - BulkPoolAllocator& - // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) - operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept { - // does not do anything - return *this; - } - - ~BulkPoolAllocator() noexcept { - reset(); - } - - // Deallocates all allocated memory. - void reset() noexcept { - while (mListForFree) { - T* tmp = *mListForFree; - ROBIN_HOOD_LOG("std::free") - std::free(mListForFree); - mListForFree = reinterpret_cast_no_cast_align_warning(tmp); - } - mHead = nullptr; - } - - // allocates, but does NOT initialize. Use in-place new constructor, e.g. - // T* obj = pool.allocate(); - // ::new (static_cast(obj)) T(); - T* allocate() { - T* tmp = mHead; - if (!tmp) { - tmp = performAllocation(); - } - - mHead = *reinterpret_cast_no_cast_align_warning(tmp); - return tmp; - } - - // does not actually deallocate but puts it in store. - // make sure you have already called the destructor! e.g. with - // obj->~T(); - // pool.deallocate(obj); - void deallocate(T* obj) noexcept { - *reinterpret_cast_no_cast_align_warning(obj) = mHead; - mHead = obj; - } - - // Adds an already allocated block of memory to the allocator. This allocator is from now on - // responsible for freeing the data (with free()). If the provided data is not large enough to - // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor. - void addOrFree(void* ptr, const size_t numBytes) noexcept { - // calculate number of available elements in ptr - if (numBytes < ALIGNMENT + ALIGNED_SIZE) { - // not enough data for at least one element. Free and return. - ROBIN_HOOD_LOG("std::free") - std::free(ptr); - } else { - ROBIN_HOOD_LOG("add to buffer") - add(ptr, numBytes); - } - } - - void swap(BulkPoolAllocator& other) noexcept { - using std::swap; - swap(mHead, other.mHead); - swap(mListForFree, other.mListForFree); - } - -private: - // iterates the list of allocated memory to calculate how many to alloc next. - // Recalculating this each time saves us a size_t member. - // This ignores the fact that memory blocks might have been added manually with addOrFree. In - // practice, this should not matter much. - ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept { - auto tmp = mListForFree; - size_t numAllocs = MinNumAllocs; - - while (numAllocs * 2 <= MaxNumAllocs && tmp) { - auto x = reinterpret_cast(tmp); - tmp = *x; - numAllocs *= 2; - } - - return numAllocs; - } - - // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree(). - void add(void* ptr, const size_t numBytes) noexcept { - const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE; - - auto data = reinterpret_cast(ptr); - - // link free list - auto x = reinterpret_cast(data); - *x = mListForFree; - mListForFree = data; - - // create linked list for newly allocated data - auto* const headT = - reinterpret_cast_no_cast_align_warning(reinterpret_cast(ptr) + ALIGNMENT); - - auto* const head = reinterpret_cast(headT); - - // Visual Studio compiler automatically unrolls this loop, which is pretty cool - for (size_t i = 0; i < numElements; ++i) { - *reinterpret_cast_no_cast_align_warning(head + i * ALIGNED_SIZE) = - head + (i + 1) * ALIGNED_SIZE; - } - - // last one points to 0 - *reinterpret_cast_no_cast_align_warning(head + (numElements - 1) * ALIGNED_SIZE) = - mHead; - mHead = headT; - } - - // Called when no memory is available (mHead == 0). - // Don't inline this slow path. - ROBIN_HOOD(NOINLINE) T* performAllocation() { - size_t const numElementsToAlloc = calcNumElementsToAlloc(); - - // alloc new memory: [prev |T, T, ... T] - size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; - ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE - << " * " << numElementsToAlloc) - add(assertNotNull(std::malloc(bytes)), bytes); - return mHead; - } - - // enforce byte alignment of the T's -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) - static constexpr size_t ALIGNMENT = - (std::max)(std::alignment_of::value, std::alignment_of::value); -#else - static const size_t ALIGNMENT = - (ROBIN_HOOD_STD::alignment_of::value > ROBIN_HOOD_STD::alignment_of::value) - ? ROBIN_HOOD_STD::alignment_of::value - : +ROBIN_HOOD_STD::alignment_of::value; // the + is for walkarround -#endif - - static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT; - - static_assert(MinNumAllocs >= 1, "MinNumAllocs"); - static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs"); - static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE"); - static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod"); - static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT"); - - T* mHead{nullptr}; - T** mListForFree{nullptr}; -}; - -template -struct NodeAllocator; - -// dummy allocator that does nothing -template -struct NodeAllocator { - - // we are not using the data, so just free it. - void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept { - ROBIN_HOOD_LOG("std::free") - std::free(ptr); - } -}; - -template -struct NodeAllocator : public BulkPoolAllocator {}; - -// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making -// my own here. -namespace swappable { -#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17) -using std::swap; -template -struct nothrow { - static const bool value = noexcept(swap(std::declval(), std::declval())); -}; -#else -template -struct nothrow { - static const bool value = std::is_nothrow_swappable::value; -}; -#endif -} // namespace swappable - -} // namespace detail - -struct is_transparent_tag {}; - -// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable, -// which means it would not be allowed to be used in std::memcpy. This struct is copyable, which is -// also tested. -template -struct pair { - using first_type = T1; - using second_type = T2; - - template ::value && - std::is_default_constructible::value>::type> - constexpr pair() noexcept(noexcept(U1()) && noexcept(U2())) - : first() - , second() {} - - // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. - explicit constexpr pair(std::pair const& o) noexcept( - noexcept(T1(std::declval())) && noexcept(T2(std::declval()))) - : first(o.first) - , second(o.second) {} - - // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. - explicit constexpr pair(std::pair&& o) noexcept(noexcept( - T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) - : first(std::move(o.first)) - , second(std::move(o.second)) {} - - constexpr pair(T1&& a, T2&& b) noexcept(noexcept( - T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) - : first(std::move(a)) - , second(std::move(b)) {} - - template - constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward( - std::declval()))) && noexcept(T2(std::forward(std::declval())))) - : first(std::forward(a)) - , second(std::forward(b)) {} - - template - // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members" - // if this constructor is constexpr -#if !ROBIN_HOOD(BROKEN_CONSTEXPR) - constexpr -#endif - pair(std::piecewise_construct_t /*unused*/, std::tuple a, - std::tuple - b) noexcept(noexcept(pair(std::declval&>(), - std::declval&>(), - ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()))) - : pair(a, b, ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()) { - } - - // constructor called from the std::piecewise_construct_t ctor - template - pair(std::tuple& a, std::tuple& b, ROBIN_HOOD_STD::index_sequence /*unused*/, ROBIN_HOOD_STD::index_sequence /*unused*/) noexcept( - noexcept(T1(std::forward(std::get( - std::declval&>()))...)) && noexcept(T2(std:: - forward(std::get( - std::declval&>()))...))) - : first(std::forward(std::get(a))...) - , second(std::forward(std::get(b))...) { - // make visual studio compiler happy about warning about unused a & b. - // Visual studio's pair implementation disables warning 4100. - (void)a; - (void)b; - } - - void swap(pair& o) noexcept((detail::swappable::nothrow::value) && - (detail::swappable::nothrow::value)) { - using std::swap; - swap(first, o.first); - swap(second, o.second); - } - - T1 first; // NOLINT(misc-non-private-member-variables-in-classes) - T2 second; // NOLINT(misc-non-private-member-variables-in-classes) -}; - -template -inline void swap(pair& a, pair& b) noexcept( - noexcept(std::declval&>().swap(std::declval&>()))) { - a.swap(b); -} - -template -inline constexpr bool operator==(pair const& x, pair const& y) { - return (x.first == y.first) && (x.second == y.second); -} -template -inline constexpr bool operator!=(pair const& x, pair const& y) { - return !(x == y); -} -template -inline constexpr bool operator<(pair const& x, pair const& y) noexcept(noexcept( - std::declval() < std::declval()) && noexcept(std::declval() < - std::declval())) { - return x.first < y.first || (!(y.first < x.first) && x.second < y.second); -} -template -inline constexpr bool operator>(pair const& x, pair const& y) { - return y < x; -} -template -inline constexpr bool operator<=(pair const& x, pair const& y) { - return !(x > y); -} -template -inline constexpr bool operator>=(pair const& x, pair const& y) { - return !(x < y); -} - -inline size_t hash_bytes(void const* ptr, size_t len) noexcept { - static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); - static constexpr uint64_t seed = UINT64_C(0xe17a1465); - static constexpr unsigned int r = 47; - - auto const* const data64 = static_cast(ptr); - uint64_t h = seed ^ (len * m); - - size_t const n_blocks = len / 8; - for (size_t i = 0; i < n_blocks; ++i) { - auto k = detail::unaligned_load(data64 + i); - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - } - - auto const* const data8 = reinterpret_cast(data64 + n_blocks); - switch (len & 7U) { - case 7: - h ^= static_cast(data8[6]) << 48U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 6: - h ^= static_cast(data8[5]) << 40U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 5: - h ^= static_cast(data8[4]) << 32U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 4: - h ^= static_cast(data8[3]) << 24U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 3: - h ^= static_cast(data8[2]) << 16U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 2: - h ^= static_cast(data8[1]) << 8U; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - case 1: - h ^= static_cast(data8[0]); - h *= m; - ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH - default: - break; - } - - h ^= h >> r; - - // not doing the final step here, because this will be done by keyToIdx anyways - // h *= m; - // h ^= h >> r; - return static_cast(h); -} - -inline size_t hash_int(uint64_t x) noexcept { - // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested, - // and doesn't need any special 128bit operations. - x ^= x >> 33U; - x *= UINT64_C(0xff51afd7ed558ccd); - x ^= x >> 33U; - - // not doing the final step here, because this will be done by keyToIdx anyways - // x *= UINT64_C(0xc4ceb9fe1a85ec53); - // x ^= x >> 33U; - return static_cast(x); -} - -// A thin wrapper around std::hash, performing an additional simple mixing step of the result. -template -struct hash : public std::hash { - size_t operator()(T const& obj) const - noexcept(noexcept(std::declval>().operator()(std::declval()))) { - // call base hash - auto result = std::hash::operator()(obj); - // return mixed of that, to be save against identity has - return hash_int(static_cast(result)); - } -}; - -template -struct hash> { - size_t operator()(std::basic_string const& str) const noexcept { - return hash_bytes(str.data(), sizeof(CharT) * str.size()); - } -}; - -#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) -template -struct hash> { - size_t operator()(std::basic_string_view const& sv) const noexcept { - return hash_bytes(sv.data(), sizeof(CharT) * sv.size()); - } -}; -#endif - -template -struct hash { - size_t operator()(T* ptr) const noexcept { - return hash_int(reinterpret_cast(ptr)); - } -}; - -template -struct hash> { - size_t operator()(std::unique_ptr const& ptr) const noexcept { - return hash_int(reinterpret_cast(ptr.get())); - } -}; - -template -struct hash> { - size_t operator()(std::shared_ptr const& ptr) const noexcept { - return hash_int(reinterpret_cast(ptr.get())); - } -}; - -template -struct hash::value>::type> { - size_t operator()(Enum e) const noexcept { - using Underlying = typename std::underlying_type::type; - return hash{}(static_cast(e)); - } -}; - -#define ROBIN_HOOD_HASH_INT(T) \ - template <> \ - struct hash { \ - size_t operator()(T const& obj) const noexcept { \ - return hash_int(static_cast(obj)); \ - } \ - } - -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wuseless-cast" -#endif -// see https://en.cppreference.com/w/cpp/utility/hash -ROBIN_HOOD_HASH_INT(bool); -ROBIN_HOOD_HASH_INT(char); -ROBIN_HOOD_HASH_INT(signed char); -ROBIN_HOOD_HASH_INT(unsigned char); -ROBIN_HOOD_HASH_INT(char16_t); -ROBIN_HOOD_HASH_INT(char32_t); -#if ROBIN_HOOD(HAS_NATIVE_WCHART) -ROBIN_HOOD_HASH_INT(wchar_t); -#endif -ROBIN_HOOD_HASH_INT(short); -ROBIN_HOOD_HASH_INT(unsigned short); -ROBIN_HOOD_HASH_INT(int); -ROBIN_HOOD_HASH_INT(unsigned int); -ROBIN_HOOD_HASH_INT(long); -ROBIN_HOOD_HASH_INT(long long); -ROBIN_HOOD_HASH_INT(unsigned long); -ROBIN_HOOD_HASH_INT(unsigned long long); -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic pop -#endif -namespace detail { - -template -struct void_type { - using type = void; -}; - -template -struct has_is_transparent : public std::false_type {}; - -template -struct has_is_transparent::type> - : public std::true_type {}; - -// using wrapper classes for hash and key_equal prevents the diamond problem when the same type -// is used. see https://stackoverflow.com/a/28771920/48181 -template -struct WrapHash : public T { - WrapHash() = default; - explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval()))) - : T(o) {} -}; - -template -struct WrapKeyEqual : public T { - WrapKeyEqual() = default; - explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval()))) - : T(o) {} -}; - -// A highly optimized hashmap implementation, using the Robin Hood algorithm. -// -// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but -// be about 2x faster in most cases and require much less allocations. -// -// This implementation uses the following memory layout: -// -// [Node, Node, ... Node | info, info, ... infoSentinel ] -// -// * Node: either a DataNode that directly has the std::pair as member, -// or a DataNode with a pointer to std::pair. Which DataNode representation to use -// depends on how fast the swap() operation is. Heuristically, this is automatically choosen -// based on sizeof(). there are always 2^n Nodes. -// -// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes. -// Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the -// corresponding node contains data. Set to 2 means the corresponding Node is filled, but it -// actually belongs to the previous position and was pushed out because that place is already -// taken. -// -// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the -// need for a idx variable. -// -// According to STL, order of templates has effect on throughput. That's why I've moved the -// boolean to the front. -// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/ -template -class Table - : public WrapHash, - public WrapKeyEqual, - detail::NodeAllocator< - typename std::conditional< - std::is_void::value, Key, - robin_hood::pair::type, T>>::type, - 4, 16384, IsFlat> { -public: - static constexpr bool is_flat = IsFlat; - static constexpr bool is_map = !std::is_void::value; - static constexpr bool is_set = !is_map; - static constexpr bool is_transparent = - has_is_transparent::value && has_is_transparent::value; - - using key_type = Key; - using mapped_type = T; - using value_type = typename std::conditional< - is_set, Key, - robin_hood::pair::type, T>>::type; - using size_type = size_t; - using hasher = Hash; - using key_equal = KeyEqual; - using Self = Table; - -private: - static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100, - "MaxLoadFactor100 needs to be >10 && < 100"); - - using WHash = WrapHash; - using WKeyEqual = WrapKeyEqual; - - // configuration defaults - - // make sure we have 8 elements, needed to quickly rehash mInfo - static constexpr size_t InitialNumElements = sizeof(uint64_t); - static constexpr uint32_t InitialInfoNumBits = 5; - static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits; - static constexpr size_t InfoMask = InitialInfoInc - 1U; - static constexpr uint8_t InitialInfoHashShift = 0; - using DataPool = detail::NodeAllocator; - - // type needs to be wider than uint8_t. - using InfoType = uint32_t; - - // DataNode //////////////////////////////////////////////////////// - - // Primary template for the data node. We have special implementations for small and big - // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these - // on the heap so swap merely swaps a pointer. - template - class DataNode {}; - - // Small: just allocate on the stack. - template - class DataNode final { - public: - template - explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept( - noexcept(value_type(std::forward(args)...))) - : mData(std::forward(args)...) {} - - DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept( - std::is_nothrow_move_constructible::value) - : mData(std::move(n.mData)) {} - - // doesn't do anything - void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {} - void destroyDoNotDeallocate() noexcept {} - - value_type const* operator->() const noexcept { - return &mData; - } - value_type* operator->() noexcept { - return &mData; - } - - const value_type& operator*() const noexcept { - return mData; - } - - value_type& operator*() noexcept { - return mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return mData.first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type - getFirst() const noexcept { - return mData.first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() const noexcept { - return mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() noexcept { - return mData.second; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() const noexcept { - return mData.second; - } - - void swap(DataNode& o) noexcept( - noexcept(std::declval().swap(std::declval()))) { - mData.swap(o.mData); - } - - private: - value_type mData; - }; - - // big object: allocate on heap. - template - class DataNode { - public: - template - explicit DataNode(M& map, Args&&... args) - : mData(map.allocate()) { - ::new (static_cast(mData)) value_type(std::forward(args)...); - } - - DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept - : mData(std::move(n.mData)) {} - - void destroy(M& map) noexcept { - // don't deallocate, just put it into list of datapool. - mData->~value_type(); - map.deallocate(mData); - } - - void destroyDoNotDeallocate() noexcept { - mData->~value_type(); - } - - value_type const* operator->() const noexcept { - return mData; - } - - value_type* operator->() noexcept { - return mData; - } - - const value_type& operator*() const { - return *mData; - } - - value_type& operator*() { - return *mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return mData->first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() noexcept { - return *mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type - getFirst() const noexcept { - return mData->first; - } - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() const noexcept { - return *mData; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() noexcept { - return mData->second; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getSecond() const noexcept { - return mData->second; - } - - void swap(DataNode& o) noexcept { - using std::swap; - swap(mData, o.mData); - } - - private: - value_type* mData; - }; - - using Node = DataNode; - - // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required) - ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept { - return n.getFirst(); - } - - // in case we have void mapped_type, we are not using a pair, thus we just route k through. - // No need to disable this because it's just not used if not applicable. - ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept { - return k; - } - - // in case we have non-void mapped_type, we have a standard robin_hood::pair - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::value, key_type const&>::type - getFirstConst(value_type const& vt) const noexcept { - return vt.first; - } - - // Cloner ////////////////////////////////////////////////////////// - - template - struct Cloner; - - // fast path: Just copy data, without allocating anything. - template - struct Cloner { - void operator()(M const& source, M& target) const { - auto const* const src = reinterpret_cast(source.mKeyVals); - auto* tgt = reinterpret_cast(target.mKeyVals); - auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1); - std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt); - } - }; - - template - struct Cloner { - void operator()(M const& s, M& t) const { - auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1); - std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo); - - for (size_t i = 0; i < numElementsWithBuffer; ++i) { - if (t.mInfo[i]) { - ::new (static_cast(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]); - } - } - } - }; - - // Destroyer /////////////////////////////////////////////////////// - - template - struct Destroyer {}; - - template - struct Destroyer { - void nodes(M& m) const noexcept { - m.mNumElements = 0; - } - - void nodesDoNotDeallocate(M& m) const noexcept { - m.mNumElements = 0; - } - }; - - template - struct Destroyer { - void nodes(M& m) const noexcept { - m.mNumElements = 0; - // clear also resets mInfo to 0, that's sometimes not necessary. - auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); - - for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { - if (0 != m.mInfo[idx]) { - Node& n = m.mKeyVals[idx]; - n.destroy(m); - n.~Node(); - } - } - } - - void nodesDoNotDeallocate(M& m) const noexcept { - m.mNumElements = 0; - // clear also resets mInfo to 0, that's sometimes not necessary. - auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); - for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { - if (0 != m.mInfo[idx]) { - Node& n = m.mKeyVals[idx]; - n.destroyDoNotDeallocate(); - n.~Node(); - } - } - } - }; - - // Iter //////////////////////////////////////////////////////////// - - struct fast_forward_tag {}; - - // generic iterator for both const_iterator and iterator. - template - // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions) - class Iter { - private: - using NodePtr = typename std::conditional::type; - - public: - using difference_type = std::ptrdiff_t; - using value_type = typename Self::value_type; - using reference = typename std::conditional::type; - using pointer = typename std::conditional::type; - using iterator_category = std::forward_iterator_tag; - - // default constructed iterator can be compared to itself, but WON'T return true when - // compared to end(). - Iter() = default; - - // Rule of zero: nothing specified. The conversion constructor is only enabled for - // iterator to const_iterator, so it doesn't accidentally work as a copy ctor. - - // Conversion constructor from iterator to const_iterator. - template ::type> - // NOLINTNEXTLINE(hicpp-explicit-conversions) - Iter(Iter const& other) noexcept - : mKeyVals(other.mKeyVals) - , mInfo(other.mInfo) {} - - Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept - : mKeyVals(valPtr) - , mInfo(infoPtr) {} - - Iter(NodePtr valPtr, uint8_t const* infoPtr, - fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept - : mKeyVals(valPtr) - , mInfo(infoPtr) { - fastForward(); - } - - template ::type> - Iter& operator=(Iter const& other) noexcept { - mKeyVals = other.mKeyVals; - mInfo = other.mInfo; - return *this; - } - - // prefix increment. Undefined behavior if we are at end()! - Iter& operator++() noexcept { - mInfo++; - mKeyVals++; - fastForward(); - return *this; - } - - Iter operator++(int) noexcept { - Iter tmp = *this; - ++(*this); - return tmp; - } - - reference operator*() const { - return **mKeyVals; - } - - pointer operator->() const { - return &**mKeyVals; - } - - template - bool operator==(Iter const& o) const noexcept { - return mKeyVals == o.mKeyVals; - } - - template - bool operator!=(Iter const& o) const noexcept { - return mKeyVals != o.mKeyVals; - } - - private: - // fast forward to the next non-free info byte - // I've tried a few variants that don't depend on intrinsics, but unfortunately they are - // quite a bit slower than this one. So I've reverted that change again. See map_benchmark. - void fastForward() noexcept { - size_t n = 0; - while (0U == (n = detail::unaligned_load(mInfo))) { - mInfo += sizeof(size_t); - mKeyVals += sizeof(size_t); - } -#if defined(ROBIN_HOOD_DISABLE_INTRINSICS) - // we know for certain that within the next 8 bytes we'll find a non-zero one. - if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { - mInfo += 4; - mKeyVals += 4; - } - if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { - mInfo += 2; - mKeyVals += 2; - } - if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) { - mInfo += 1; - mKeyVals += 1; - } -#else -# if ROBIN_HOOD(LITTLE_ENDIAN) - auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; -# else - auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; -# endif - mInfo += inc; - mKeyVals += inc; -#endif - } - - friend class Table; - NodePtr mKeyVals{nullptr}; - uint8_t const* mInfo{nullptr}; - }; - - //////////////////////////////////////////////////////////////////// - - // highly performance relevant code. - // Lower bits are used for indexing into the array (2^n size) - // The upper 1-5 bits need to be a reasonable good hash, to save comparisons. - template - void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const { - // In addition to whatever hash is used, add another mul & shift so we get better hashing. - // This serves as a bad hash prevention, if the given data is - // badly mixed. - auto h = static_cast(WHash::operator()(key)); - - h *= mHashMultiplier; - h ^= h >> 33U; - - // the lower InitialInfoNumBits are reserved for info. - *info = mInfoInc + static_cast((h & InfoMask) >> mInfoHashShift); - *idx = (static_cast(h) >> InitialInfoNumBits) & mMask; - } - - // forwards the index by one, wrapping around at the end - void next(InfoType* info, size_t* idx) const noexcept { - *idx = *idx + 1; - *info += mInfoInc; - } - - void nextWhileLess(InfoType* info, size_t* idx) const noexcept { - // unrolling this by hand did not bring any speedups. - while (*info < mInfo[*idx]) { - next(info, idx); - } - } - - // Shift everything up by one element. Tries to move stuff around. - void - shiftUp(size_t startIdx, - size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable::value) { - auto idx = startIdx; - ::new (static_cast(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1])); - while (--idx != insertion_idx) { - mKeyVals[idx] = std::move(mKeyVals[idx - 1]); - } - - idx = startIdx; - while (idx != insertion_idx) { - ROBIN_HOOD_COUNT(shiftUp) - mInfo[idx] = static_cast(mInfo[idx - 1] + mInfoInc); - if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - --idx; - } - } - - void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable::value) { - // until we find one that is either empty or has zero offset. - // TODO(martinus) we don't need to move everything, just the last one for the same - // bucket. - mKeyVals[idx].destroy(*this); - - // until we find one that is either empty or has zero offset. - while (mInfo[idx + 1] >= 2 * mInfoInc) { - ROBIN_HOOD_COUNT(shiftDown) - mInfo[idx] = static_cast(mInfo[idx + 1] - mInfoInc); - mKeyVals[idx] = std::move(mKeyVals[idx + 1]); - ++idx; - } - - mInfo[idx] = 0; - // don't destroy, we've moved it - // mKeyVals[idx].destroy(*this); - mKeyVals[idx].~Node(); - } - - // copy of find(), except that it returns iterator instead of const_iterator. - template - ROBIN_HOOD(NODISCARD) - size_t findIdx(Other const& key) const { - size_t idx{}; - InfoType info{}; - keyToIdx(key, &idx, &info); - - do { - // unrolling this twice gives a bit of a speedup. More unrolling did not help. - if (info == mInfo[idx] && - ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { - return idx; - } - next(&info, &idx); - if (info == mInfo[idx] && - ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { - return idx; - } - next(&info, &idx); - } while (info <= mInfo[idx]); - - // nothing found! - return mMask == 0 ? 0 - : static_cast(std::distance( - mKeyVals, reinterpret_cast_no_cast_align_warning(mInfo))); - } - - void cloneData(const Table& o) { - Cloner()(o, *this); - } - - // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. - // @return True on success, false if something went wrong - void insert_move(Node&& keyval) { - // we don't retry, fail if overflowing - // don't need to check max num elements - if (0 == mMaxNumElementsAllowed && !try_increase_info()) { - throwOverflowError(); - } - - size_t idx{}; - InfoType info{}; - keyToIdx(keyval.getFirst(), &idx, &info); - - // skip forward. Use <= because we are certain that the element is not there. - while (info <= mInfo[idx]) { - idx = idx + 1; - info += mInfoInc; - } - - // key not found, so we are now exactly where we want to insert it. - auto const insertion_idx = idx; - auto const insertion_info = static_cast(info); - if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - - // find an empty spot - while (0 != mInfo[idx]) { - next(&info, &idx); - } - - auto& l = mKeyVals[insertion_idx]; - if (idx == insertion_idx) { - ::new (static_cast(&l)) Node(std::move(keyval)); - } else { - shiftUp(idx, insertion_idx); - l = std::move(keyval); - } - - // put at empty spot - mInfo[insertion_idx] = insertion_info; - - ++mNumElements; - } - -public: - using iterator = Iter; - using const_iterator = Iter; - - Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual())) - : WHash() - , WKeyEqual() { - ROBIN_HOOD_TRACE(this) - } - - // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. - // This tremendously speeds up ctor & dtor of a map that never receives an element. The - // penalty is payed at the first insert, and not before. Lookup of this empty map works - // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the - // standard, but we can ignore it. - explicit Table( - size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{}, - const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal))) - : WHash(h) - , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this) - } - - template - Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, - const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) - : WHash(h) - , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this) - insert(first, last); - } - - Table(std::initializer_list initlist, - size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, - const KeyEqual& equal = KeyEqual{}) - : WHash(h) - , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this) - insert(initlist.begin(), initlist.end()); - } - - Table(Table&& o) noexcept - : WHash(std::move(static_cast(o))) - , WKeyEqual(std::move(static_cast(o))) - , DataPool(std::move(static_cast(o))) { - ROBIN_HOOD_TRACE(this) - if (o.mMask) { - mHashMultiplier = std::move(o.mHashMultiplier); - mKeyVals = std::move(o.mKeyVals); - mInfo = std::move(o.mInfo); - mNumElements = std::move(o.mNumElements); - mMask = std::move(o.mMask); - mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); - mInfoInc = std::move(o.mInfoInc); - mInfoHashShift = std::move(o.mInfoHashShift); - // set other's mask to 0 so its destructor won't do anything - o.init(); - } - } - - Table& operator=(Table&& o) noexcept { - ROBIN_HOOD_TRACE(this) - if (&o != this) { - if (o.mMask) { - // only move stuff if the other map actually has some data - destroy(); - mHashMultiplier = std::move(o.mHashMultiplier); - mKeyVals = std::move(o.mKeyVals); - mInfo = std::move(o.mInfo); - mNumElements = std::move(o.mNumElements); - mMask = std::move(o.mMask); - mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); - mInfoInc = std::move(o.mInfoInc); - mInfoHashShift = std::move(o.mInfoHashShift); - WHash::operator=(std::move(static_cast(o))); - WKeyEqual::operator=(std::move(static_cast(o))); - DataPool::operator=(std::move(static_cast(o))); - - o.init(); - - } else { - // nothing in the other map => just clear us. - clear(); - } - } - return *this; - } - - Table(const Table& o) - : WHash(static_cast(o)) - , WKeyEqual(static_cast(o)) - , DataPool(static_cast(o)) { - ROBIN_HOOD_TRACE(this) - if (!o.empty()) { - // not empty: create an exact copy. it is also possible to just iterate through all - // elements and insert them, but copying is probably faster. - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); - - ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" - << numElementsWithBuffer << ")") - mHashMultiplier = o.mHashMultiplier; - mKeyVals = static_cast( - detail::assertNotNull(std::malloc(numBytesTotal))); - // no need for calloc because clonData does memcpy - mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); - mNumElements = o.mNumElements; - mMask = o.mMask; - mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; - mInfoInc = o.mInfoInc; - mInfoHashShift = o.mInfoHashShift; - cloneData(o); - } - } - - // Creates a copy of the given map. Copy constructor of each entry is used. - // Not sure why clang-tidy thinks this doesn't handle self assignment, it does - // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) - Table& operator=(Table const& o) { - ROBIN_HOOD_TRACE(this) - if (&o == this) { - // prevent assigning of itself - return *this; - } - - // we keep using the old allocator and not assign the new one, because we want to keep - // the memory available. when it is the same size. - if (o.empty()) { - if (0 == mMask) { - // nothing to do, we are empty too - return *this; - } - - // not empty: destroy what we have there - // clear also resets mInfo to 0, that's sometimes not necessary. - destroy(); - init(); - WHash::operator=(static_cast(o)); - WKeyEqual::operator=(static_cast(o)); - DataPool::operator=(static_cast(o)); - - return *this; - } - - // clean up old stuff - Destroyer::value>{}.nodes(*this); - - if (mMask != o.mMask) { - // no luck: we don't have the same array size allocated, so we need to realloc. - if (0 != mMask) { - // only deallocate if we actually have data! - ROBIN_HOOD_LOG("std::free") - std::free(mKeyVals); - } - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); - ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" - << numElementsWithBuffer << ")") - mKeyVals = static_cast( - detail::assertNotNull(std::malloc(numBytesTotal))); - - // no need for calloc here because cloneData performs a memcpy. - mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); - // sentinel is set in cloneData - } - WHash::operator=(static_cast(o)); - WKeyEqual::operator=(static_cast(o)); - DataPool::operator=(static_cast(o)); - mHashMultiplier = o.mHashMultiplier; - mNumElements = o.mNumElements; - mMask = o.mMask; - mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; - mInfoInc = o.mInfoInc; - mInfoHashShift = o.mInfoHashShift; - cloneData(o); - - return *this; - } - - // Swaps everything between the two maps. - void swap(Table& o) { - ROBIN_HOOD_TRACE(this) - using std::swap; - swap(o, *this); - } - - // Clears all data, without resizing. - void clear() { - ROBIN_HOOD_TRACE(this) - if (empty()) { - // don't do anything! also important because we don't want to write to - // DummyInfoByte::b, even though we would just write 0 to it. - return; - } - - Destroyer::value>{}.nodes(*this); - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); - // clear everything, then set the sentinel again - uint8_t const z = 0; - std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z); - mInfo[numElementsWithBuffer] = 1; - - mInfoInc = InitialInfoInc; - mInfoHashShift = InitialInfoHashShift; - } - - // Destroys the map and all it's contents. - ~Table() { - ROBIN_HOOD_TRACE(this) - destroy(); - } - - // Checks if both tables contain the same entries. Order is irrelevant. - bool operator==(const Table& other) const { - ROBIN_HOOD_TRACE(this) - if (other.size() != size()) { - return false; - } - for (auto const& otherEntry : other) { - if (!has(otherEntry)) { - return false; - } - } - - return true; - } - - bool operator!=(const Table& other) const { - ROBIN_HOOD_TRACE(this) - return !operator==(other); - } - - template - typename std::enable_if::value, Q&>::type operator[](const key_type& key) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) - Node(*this, std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple()); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, - std::forward_as_tuple(key), std::forward_as_tuple()); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - } - - return mKeyVals[idxAndState.first].getSecond(); - } - - template - typename std::enable_if::value, Q&>::type operator[](key_type&& key) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) - Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), - std::forward_as_tuple()); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = - Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), - std::forward_as_tuple()); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - } - - return mKeyVals[idxAndState.first].getSecond(); - } - - template - void insert(Iter first, Iter last) { - for (; first != last; ++first) { - // value_type ctor needed because this might be called with std::pair's - insert(value_type(*first)); - } - } - - void insert(std::initializer_list ilist) { - for (auto&& vt : ilist) { - insert(std::move(vt)); - } - } - - template - std::pair emplace(Args&&... args) { - ROBIN_HOOD_TRACE(this) - Node n{*this, std::forward(args)...}; - auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n)); - switch (idxAndState.second) { - case InsertionState::key_found: - n.destroy(*this); - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) Node(*this, std::move(n)); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = std::move(n); - break; - - case InsertionState::overflow_error: - n.destroy(*this); - throwOverflowError(); - break; - } - - return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), - InsertionState::key_found != idxAndState.second); - } - - template - iterator emplace_hint(const_iterator position, Args&&... args) { - (void)position; - return emplace(std::forward(args)...).first; - } - - template - std::pair try_emplace(const key_type& key, Args&&... args) { - return try_emplace_impl(key, std::forward(args)...); - } - - template - std::pair try_emplace(key_type&& key, Args&&... args) { - return try_emplace_impl(std::move(key), std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) { - (void)hint; - return try_emplace_impl(key, std::forward(args)...).first; - } - - template - iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) { - (void)hint; - return try_emplace_impl(std::move(key), std::forward(args)...).first; - } - - template - std::pair insert_or_assign(const key_type& key, Mapped&& obj) { - return insertOrAssignImpl(key, std::forward(obj)); - } - - template - std::pair insert_or_assign(key_type&& key, Mapped&& obj) { - return insertOrAssignImpl(std::move(key), std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) { - (void)hint; - return insertOrAssignImpl(key, std::forward(obj)).first; - } - - template - iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) { - (void)hint; - return insertOrAssignImpl(std::move(key), std::forward(obj)).first; - } - - std::pair insert(const value_type& keyval) { - ROBIN_HOOD_TRACE(this) - return emplace(keyval); - } - - iterator insert(const_iterator hint, const value_type& keyval) { - (void)hint; - return emplace(keyval).first; - } - - std::pair insert(value_type&& keyval) { - return emplace(std::move(keyval)); - } - - iterator insert(const_iterator hint, value_type&& keyval) { - (void)hint; - return emplace(std::move(keyval)).first; - } - - // Returns 1 if key is found, 0 otherwise. - size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { - return 1; - } - return 0; - } - - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::type count(const OtherKey& key) const { - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { - return 1; - } - return 0; - } - - bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - return 1U == count(key); - } - - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::type contains(const OtherKey& key) const { - return 1U == count(key); - } - - // Returns a reference to the value found for key. - // Throws std::out_of_range if element cannot be found - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::value, Q&>::type at(key_type const& key) { - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { - doThrow("key not found"); - } - return kv->getSecond(); - } - - // Returns a reference to the value found for key. - // Throws std::out_of_range if element cannot be found - template - // NOLINTNEXTLINE(modernize-use-nodiscard) - typename std::enable_if::value, Q const&>::type at(key_type const& key) const { - ROBIN_HOOD_TRACE(this) - auto kv = mKeyVals + findIdx(key); - if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { - doThrow("key not found"); - } - return kv->getSecond(); - } - - const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return const_iterator{mKeyVals + idx, mInfo + idx}; - } - - template - const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return const_iterator{mKeyVals + idx, mInfo + idx}; - } - - template - typename std::enable_if::type // NOLINT(modernize-use-nodiscard) - find(const OtherKey& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return const_iterator{mKeyVals + idx, mInfo + idx}; - } - - iterator find(const key_type& key) { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return iterator{mKeyVals + idx, mInfo + idx}; - } - - template - iterator find(const OtherKey& key, is_transparent_tag /*unused*/) { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return iterator{mKeyVals + idx, mInfo + idx}; - } - - template - typename std::enable_if::type find(const OtherKey& key) { - ROBIN_HOOD_TRACE(this) - const size_t idx = findIdx(key); - return iterator{mKeyVals + idx, mInfo + idx}; - } - - iterator begin() { - ROBIN_HOOD_TRACE(this) - if (empty()) { - return end(); - } - return iterator(mKeyVals, mInfo, fast_forward_tag{}); - } - const_iterator begin() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return cbegin(); - } - const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - if (empty()) { - return cend(); - } - return const_iterator(mKeyVals, mInfo, fast_forward_tag{}); - } - - iterator end() { - ROBIN_HOOD_TRACE(this) - // no need to supply valid info pointer: end() must not be dereferenced, and only node - // pointer is compared. - return iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; - } - const_iterator end() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return cend(); - } - const_iterator cend() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return const_iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; - } - - iterator erase(const_iterator pos) { - ROBIN_HOOD_TRACE(this) - // its safe to perform const cast here - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) - return erase(iterator{const_cast(pos.mKeyVals), const_cast(pos.mInfo)}); - } - - // Erases element at pos, returns iterator to the next element. - iterator erase(iterator pos) { - ROBIN_HOOD_TRACE(this) - // we assume that pos always points to a valid entry, and not end(). - auto const idx = static_cast(pos.mKeyVals - mKeyVals); - - shiftDown(idx); - --mNumElements; - - if (*pos.mInfo) { - // we've backward shifted, return this again - return pos; - } - - // no backward shift, return next element - return ++pos; - } - - size_t erase(const key_type& key) { - ROBIN_HOOD_TRACE(this) - size_t idx{}; - InfoType info{}; - keyToIdx(key, &idx, &info); - - // check while info matches with the source idx - do { - if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { - shiftDown(idx); - --mNumElements; - return 1; - } - next(&info, &idx); - } while (info <= mInfo[idx]); - - // nothing found to delete - return 0; - } - - // reserves space for the specified number of elements. Makes sure the old data fits. - // exactly the same as reserve(c). - void rehash(size_t c) { - // forces a reserve - reserve(c, true); - } - - // reserves space for the specified number of elements. Makes sure the old data fits. - // Exactly the same as rehash(c). Use rehash(0) to shrink to fit. - void reserve(size_t c) { - // reserve, but don't force rehash - reserve(c, false); - } - - // If possible reallocates the map to a smaller one. This frees the underlying table. - // Does not do anything if load_factor is too large for decreasing the table's size. - void compact() { - ROBIN_HOOD_TRACE(this) - auto newSize = InitialNumElements; - while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) { - newSize *= 2; - } - if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { - throwOverflowError(); - } - - ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") - - // only actually do anything when the new size is bigger than the old one. This prevents to - // continuously allocate for each reserve() call. - if (newSize < mMask + 1) { - rehashPowerOfTwo(newSize, true); - } - } - - size_type size() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return mNumElements; - } - - size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return static_cast(-1); - } - - ROBIN_HOOD(NODISCARD) bool empty() const noexcept { - ROBIN_HOOD_TRACE(this) - return 0 == mNumElements; - } - - float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return MaxLoadFactor100 / 100.0F; - } - - // Average number of elements per bucket. Since we allow only 1 per bucket - float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this) - return static_cast(size()) / static_cast(mMask + 1); - } - - ROBIN_HOOD(NODISCARD) size_t mask() const noexcept { - ROBIN_HOOD_TRACE(this) - return mMask; - } - - ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept { - if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits::max)() / 100)) { - return maxElements * MaxLoadFactor100 / 100; - } - - // we might be a bit inprecise, but since maxElements is quite large that doesn't matter - return (maxElements / 100) * MaxLoadFactor100; - } - - ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept { - // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load - // 64bit types. - return numElements + sizeof(uint64_t); - } - - ROBIN_HOOD(NODISCARD) - size_t calcNumElementsWithBuffer(size_t numElements) const noexcept { - auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements); - return numElements + (std::min)(maxNumElementsAllowed, (static_cast(0xFF))); - } - - // calculation only allowed for 2^n values - ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const { -#if ROBIN_HOOD(BITNESS) == 64 - return numElements * sizeof(Node) + calcNumBytesInfo(numElements); -#else - // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows. - auto const ne = static_cast(numElements); - auto const s = static_cast(sizeof(Node)); - auto const infos = static_cast(calcNumBytesInfo(numElements)); - - auto const total64 = ne * s + infos; - auto const total = static_cast(total64); - - if (ROBIN_HOOD_UNLIKELY(static_cast(total) != total64)) { - throwOverflowError(); - } - return total; -#endif - } - -private: - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::value, bool>::type has(const value_type& e) const { - ROBIN_HOOD_TRACE(this) - auto it = find(e.first); - return it != end() && it->second == e.second; - } - - template - ROBIN_HOOD(NODISCARD) - typename std::enable_if::value, bool>::type has(const value_type& e) const { - ROBIN_HOOD_TRACE(this) - return find(e) != end(); - } - - void reserve(size_t c, bool forceRehash) { - ROBIN_HOOD_TRACE(this) - auto const minElementsAllowed = (std::max)(c, mNumElements); - auto newSize = InitialNumElements; - while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { - newSize *= 2; - } - if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { - throwOverflowError(); - } - - ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") - - // only actually do anything when the new size is bigger than the old one. This prevents to - // continuously allocate for each reserve() call. - if (forceRehash || newSize > mMask + 1) { - rehashPowerOfTwo(newSize, false); - } - } - - // reserves space for at least the specified number of elements. - // only works if numBuckets if power of two - // True on success, false otherwise - void rehashPowerOfTwo(size_t numBuckets, bool forceFree) { - ROBIN_HOOD_TRACE(this) - - Node* const oldKeyVals = mKeyVals; - uint8_t const* const oldInfo = mInfo; - - const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); - - // resize operation: move stuff - initData(numBuckets); - if (oldMaxElementsWithBuffer > 1) { - for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) { - if (oldInfo[i] != 0) { - // might throw an exception, which is really bad since we are in the middle of - // moving stuff. - insert_move(std::move(oldKeyVals[i])); - // destroy the node but DON'T destroy the data. - oldKeyVals[i].~Node(); - } - } - - // this check is not necessary as it's guarded by the previous if, but it helps - // silence g++'s overeager "attempt to free a non-heap object 'map' - // [-Werror=free-nonheap-object]" warning. - if (oldKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { - // don't destroy old data: put it into the pool instead - if (forceFree) { - std::free(oldKeyVals); - } else { - DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); - } - } - } - } - - ROBIN_HOOD(NOINLINE) void throwOverflowError() const { -#if ROBIN_HOOD(HAS_EXCEPTIONS) - throw std::overflow_error("robin_hood::map overflow"); -#else - abort(); -#endif - } - - template - std::pair try_emplace_impl(OtherKey&& key, Args&&... args) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) Node( - *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(args)...)); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(args)...)); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - break; - } - - return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), - InsertionState::key_found != idxAndState.second); - } - - template - std::pair insertOrAssignImpl(OtherKey&& key, Mapped&& obj) { - ROBIN_HOOD_TRACE(this) - auto idxAndState = insertKeyPrepareEmptySpot(key); - switch (idxAndState.second) { - case InsertionState::key_found: - mKeyVals[idxAndState.first].getSecond() = std::forward(obj); - break; - - case InsertionState::new_node: - ::new (static_cast(&mKeyVals[idxAndState.first])) Node( - *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(obj))); - break; - - case InsertionState::overwrite_node: - mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(obj))); - break; - - case InsertionState::overflow_error: - throwOverflowError(); - break; - } - - return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), - InsertionState::key_found != idxAndState.second); - } - - void initData(size_t max_elements) { - mNumElements = 0; - mMask = max_elements - 1; - mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); - - auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements); - - // malloc & zero mInfo. Faster than calloc everything. - auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); - ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal(" - << numElementsWithBuffer << ")") - mKeyVals = reinterpret_cast( - detail::assertNotNull(std::malloc(numBytesTotal))); - mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); - std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node)); - - // set sentinel - mInfo[numElementsWithBuffer] = 1; - - mInfoInc = InitialInfoInc; - mInfoHashShift = InitialInfoHashShift; - } - - enum class InsertionState { overflow_error, key_found, new_node, overwrite_node }; - - // Finds key, and if not already present prepares a spot where to pot the key & value. - // This potentially shifts nodes out of the way, updates mInfo and number of inserted - // elements, so the only operation left to do is create/assign a new node at that spot. - template - std::pair insertKeyPrepareEmptySpot(OtherKey&& key) { - for (int i = 0; i < 256; ++i) { - size_t idx{}; - InfoType info{}; - keyToIdx(key, &idx, &info); - nextWhileLess(&info, &idx); - - // while we potentially have a match - while (info == mInfo[idx]) { - if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { - // key already exists, do NOT insert. - // see http://en.cppreference.com/w/cpp/container/unordered_map/insert - return std::make_pair(idx, InsertionState::key_found); - } - next(&info, &idx); - } - - // unlikely that this evaluates to true - if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { - if (!increase_size()) { - return std::make_pair(size_t(0), InsertionState::overflow_error); - } - continue; - } - - // key not found, so we are now exactly where we want to insert it. - auto const insertion_idx = idx; - auto const insertion_info = info; - if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - - // find an empty spot - while (0 != mInfo[idx]) { - next(&info, &idx); - } - - if (idx != insertion_idx) { - shiftUp(idx, insertion_idx); - } - // put at empty spot - mInfo[insertion_idx] = static_cast(insertion_info); - ++mNumElements; - return std::make_pair(insertion_idx, idx == insertion_idx - ? InsertionState::new_node - : InsertionState::overwrite_node); - } - - // enough attempts failed, so finally give up. - return std::make_pair(size_t(0), InsertionState::overflow_error); - } - - bool try_increase_info() { - ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements - << ", maxNumElementsAllowed=" - << calcMaxNumElementsAllowed(mMask + 1)) - if (mInfoInc <= 2) { - // need to be > 2 so that shift works (otherwise undefined behavior!) - return false; - } - // we got space left, try to make info smaller - mInfoInc = static_cast(mInfoInc >> 1U); - - // remove one bit of the hash, leaving more space for the distance info. - // This is extremely fast because we can operate on 8 bytes at once. - ++mInfoHashShift; - auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); - - for (size_t i = 0; i < numElementsWithBuffer; i += 8) { - auto val = unaligned_load(mInfo + i); - val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f); - std::memcpy(mInfo + i, &val, sizeof(val)); - } - // update sentinel, which might have been cleared out! - mInfo[numElementsWithBuffer] = 1; - - mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); - return true; - } - - // True if resize was possible, false otherwise - bool increase_size() { - // nothing allocated yet? just allocate InitialNumElements - if (0 == mMask) { - initData(InitialNumElements); - return true; - } - - auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); - if (mNumElements < maxNumElementsAllowed && try_increase_info()) { - return true; - } - - ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed=" - << maxNumElementsAllowed << ", load=" - << (static_cast(mNumElements) * 100.0 / - (static_cast(mMask) + 1))) - - if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { - // we have to resize, even though there would still be plenty of space left! - // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case - // we have to rehash a few times - nextHashMultiplier(); - rehashPowerOfTwo(mMask + 1, true); - } else { - // we've reached the capacity of the map, so the hash seems to work nice. Keep using it. - rehashPowerOfTwo((mMask + 1) * 2, false); - } - return true; - } - - void nextHashMultiplier() { - // adding an *even* number, so that the multiplier will always stay odd. This is necessary - // so that the hash stays a mixing function (and thus doesn't have any information loss). - mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54); - } - - void destroy() { - if (0 == mMask) { - // don't deallocate! - return; - } - - Destroyer::value>{} - .nodesDoNotDeallocate(*this); - - // This protection against not deleting mMask shouldn't be needed as it's sufficiently - // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise - // reports a compile error: attempt to free a non-heap object 'fm' - // [-Werror=free-nonheap-object] - if (mKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { - ROBIN_HOOD_LOG("std::free") - std::free(mKeyVals); - } - } - - void init() noexcept { - mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); - mInfo = reinterpret_cast(&mMask); - mNumElements = 0; - mMask = 0; - mMaxNumElementsAllowed = 0; - mInfoInc = InitialInfoInc; - mInfoHashShift = InitialInfoHashShift; - } - - // members are sorted so no padding occurs - uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53); // 8 byte 8 - Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 16 - uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 24 - size_t mNumElements = 0; // 8 byte 32 - size_t mMask = 0; // 8 byte 40 - size_t mMaxNumElementsAllowed = 0; // 8 byte 48 - InfoType mInfoInc = InitialInfoInc; // 4 byte 52 - InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 56 - // 16 byte 56 if NodeAllocator -}; - -} // namespace detail - -// map - -template , - typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> -using unordered_flat_map = detail::Table; - -template , - typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> -using unordered_node_map = detail::Table; - -template , - typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> -using unordered_map = - detail::Table) <= sizeof(size_t) * 6 && - std::is_nothrow_move_constructible>::value && - std::is_nothrow_move_assignable>::value, - MaxLoadFactor100, Key, T, Hash, KeyEqual>; - -// set - -template , typename KeyEqual = std::equal_to, - size_t MaxLoadFactor100 = 80> -using unordered_flat_set = detail::Table; - -template , typename KeyEqual = std::equal_to, - size_t MaxLoadFactor100 = 80> -using unordered_node_set = detail::Table; - -template , typename KeyEqual = std::equal_to, - size_t MaxLoadFactor100 = 80> -using unordered_set = detail::Table::value && - std::is_nothrow_move_assignable::value, - MaxLoadFactor100, Key, void, Hash, KeyEqual>; - -} // namespace robin_hood - -#endif From 86bd1bca048166129a9f3e881365d939531f19b0 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 28 Feb 2023 22:44:50 -0800 Subject: [PATCH 23/49] style add parentheses --- src/bustools_correct.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 9c1c12b..bfeec77 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -367,7 +367,7 @@ void bustools_split_correct(Bustools_opt &opt) if (dump_bool) { - if (bd.barcode & len_mask != old_barcode) + if ((bd.barcode & len_mask) != old_barcode) { of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; old_barcode = bd.barcode & len_mask; @@ -574,7 +574,7 @@ void bustools_correct(Bustools_opt &opt) uint64_t b_corrected = (ub << (2 * bc2)) | lbc; if (dump_bool) { - if (bd.barcode & len_mask != old_barcode) + if ((bd.barcode & len_mask) != old_barcode) { of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; old_barcode = bd.barcode & len_mask; @@ -590,7 +590,7 @@ void bustools_correct(Bustools_opt &opt) uint64_t b_corrected = (ubc << (2 * bc2)) | lb; if (dump_bool) { - if (bd.barcode & len_mask != old_barcode) + if ((bd.barcode & len_mask) != old_barcode) { of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; old_barcode = bd.barcode & len_mask; From ae96146f05d7b27906c6da59a6f8c7614da53fc4 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Sun, 5 Mar 2023 12:08:55 -0800 Subject: [PATCH 24/49] try again to undo bitmap --- src/Common.cpp | 82 ++++++++++++++++++++++++------------------ src/Common.hpp | 4 +-- src/bustools_count.cpp | 6 ++-- 3 files changed, 53 insertions(+), 39 deletions(-) diff --git a/src/Common.cpp b/src/Common.cpp index 9a95e54..6e0d89f 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -73,7 +73,7 @@ std::vector intersect_vectors(const std::vector> & return std::move(u); } -int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes) { +int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes) { if (ecs.empty()) { return -1; } @@ -86,35 +86,58 @@ int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::ve return ecs[0]; // no work } - uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ecs[0]][0]))); - u = Roaring(ecmap[ecs[0]].size(), data); + u.resize(0); + auto &v = ecmap[ecs[0]]; // copy + for (size_t i = 0; i< v.size(); i++) { + u.push_back(v[i]); + } for (size_t i = 1; i < ecs.size(); i++) { if (ecs[i] < 0 || ecs[i] >= ecmap.size()) { return -1; } - data = reinterpret_cast(const_cast(&(ecmap[ecs[i]][0]))); - u &= Roaring(ecmap[ecs[i]].size(), data); + const auto &v = ecmap[ecs[i]]; + + int j = 0; + int k = 0; + int l = 0; + int n = u.size(); + int m = v.size(); + // u and v are sorted, j,k,l = 0 + while (j < n && l < m) { + // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m + // u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted + if (u[j] < v[l]) { + j++; + } else if (u[j] > v[l]) { + l++; + } else { + // match + if (k < j) { + std::swap(u[k], u[j]); + } + k++; + j++; + l++; + } + } + if (k < n) { + u.resize(k); + } } - if (u.isEmpty()) { + if (u.empty()) { return -1; } auto iit = ecmapinv.find(u); if (iit == ecmapinv.end()) { // create new equivalence class int32_t ec = ecmap.size(); - uint32_t* u_arr = new uint32_t[u.cardinality()]; - u.toUint32Array(u_arr); - std::vector u_vec; - u_vec.reserve(u.cardinality()); - for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); - delete[] u_arr; - ecmap.push_back(u_vec); + ecmap.push_back(u); ecmapinv.insert({u,ec}); // figure out the gene list std::vector v; - vt2gene(u_vec, genemap, v); + vt2gene(u, genemap, v); ec2genes.push_back(std::move(v)); return ec; } else { @@ -192,7 +215,7 @@ void intersect_genes_of_ecs(const std::vector &ecs, const std::vector< int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty) { std::vector> gu; // per gene transcript results - Roaring u; // final list of transcripts + std::vector u; // final list of transcripts std::vector glist; int32_t lastg = -2; @@ -222,9 +245,11 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec // frequent case, single gene replace with union for (auto ec : ecs) { for (const auto &t : ecmap[ec]) { - u.add(t); + u.push_back(t); } } + std::sort(u.begin(), u.end()); + u.erase(std::unique(u.begin(), u.end()), u.end()); // look up ecs based on u int32_t ec = -1; @@ -235,15 +260,9 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - uint32_t* u_arr = new uint32_t[u.cardinality()]; - u.toUint32Array(u_arr); - std::vector u_vec; - u_vec.reserve(u.cardinality()); - for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); - delete[] u_arr; - ecmap.push_back(u_vec); + ecmap.push_back(u); std::vector v; - vt2gene(u_vec, genemap, v); + vt2gene(u, genemap, v); ec2genes.push_back(std::move(v)); } @@ -272,13 +291,14 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } for (auto t : uu) { - u.add(t); + u.push_back(t); } } - if (u.isEmpty()) { + if (u.empty()) { return -1; } + std::sort(u.begin(), u.end()); int32_t ec = -1; auto it = ecmapinv.find(u); @@ -287,15 +307,9 @@ int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vec } else { ec = ecmapinv.size(); ecmapinv.insert({u,ec}); - uint32_t* u_arr = new uint32_t[u.cardinality()]; - u.toUint32Array(u_arr); - std::vector u_vec; - u_vec.reserve(u.cardinality()); - for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast(u_arr[i])); - delete[] u_arr; - ecmap.push_back(u_vec); + ecmap.push_back(u); std::vector v; - vt2gene(u_vec, genemap, v); + vt2gene(u, genemap, v); ec2genes.push_back(std::move(v)); } return ec; diff --git a/src/Common.hpp b/src/Common.hpp index 4b290a3..6a50ebd 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -185,12 +185,12 @@ struct RoaringHasher { return r; } }; -typedef u_map_ EcMapInv; +typedef u_map_, int32_t, SortedVectorHasher> EcMapInv; std::vector intersect(std::vector &u, std::vector &v); std::vector union_vectors(const std::vector> &v); std::vector intersect_vectors(const std::vector> &v); -int32_t intersect_ecs(const std::vector &ecs, Roaring &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); +int32_t intersect_ecs(const std::vector &ecs, std::vector &u, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes); void vt2gene(const std::vector &v, const std::vector &genemap, std::vector &glist); void intersect_genes_of_ecs(const std::vector &ecs, const std::vector> &ec2genes, std::vector &glist); int32_t intersect_ecs_with_genes(const std::vector &ecs, const std::vector &genemap, std::vector> &ecmap, EcMapInv &ecmapinv, std::vector> &ec2genes, bool assumeIntersectionIsEmpty = true); diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index e2a63a7..e0d125f 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -32,8 +32,7 @@ void bustools_count(Bustools_opt &opt) { ecmap = std::move(h.ecs); ecmapinv.reserve(ecmap.size()); for (int32_t ec = 0; ec < ecmap.size(); ec++) { - uint32_t *data = reinterpret_cast(const_cast(&(ecmap[ec][0]))); - ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec}); + ecmapinv.insert({ecmap[ec], ec}); } std::vector> ec2genes; create_ec2genes(ecmap, genemap, ec2genes); @@ -88,7 +87,8 @@ void bustools_count(Bustools_opt &opt) { std::vector ecs; std::vector glist; ecs.reserve(100); - Roaring u; + std::vector u; + u.reserve(100); std::vector column_v; std::vector>> column_vp; // gene, {count, matrix type} if (!opt.count_collapse) { From eb58f13e59c384ea07dbd81b62f93b5262f40848 Mon Sep 17 00:00:00 2001 From: Pall Melsted Date: Tue, 14 Mar 2023 17:47:49 +0000 Subject: [PATCH 25/49] adds multicore sorting --- src/bustools_sort.cpp | 507 ++++++++++++++++++++++++++++-------------- 1 file changed, 335 insertions(+), 172 deletions(-) diff --git a/src/bustools_sort.cpp b/src/bustools_sort.cpp index 0f2323c..046f265 100644 --- a/src/bustools_sort.cpp +++ b/src/bustools_sort.cpp @@ -4,11 +4,14 @@ #include #include #include +#include #include "Common.hpp" #include "BUSData.h" #include "bustools_sort.h" +#include + #define TP std::pair //This code is for automatically creating the tmp directory supplied if it doesn't exist @@ -16,9 +19,61 @@ //#include //once filesystem is acceptable for minGW, switch to that #include "windows.h" //Needed for CreateDirectory + + void EnsureWindowsTempDirectoryExists(const Bustools_opt &opt) { + //Make sure to create the tmp directory if it doesn't exist - writing temporary files fails otherwise in Windows + //First get the directory - in theory, opt.temp_files can look like "tmp/x_" or just "x_" (or even nothing) + //so we should find the last slash and make sure that directory exists + + std::size_t ind = opt.temp_files.rfind('/'); + std::size_t ind2 = opt.temp_files.rfind('\\'); + if (ind == std::string::npos) + { + ind = ind2; + } + else if (ind2 != std::string::npos) + { + //both valid, take the largest value (representing the last slash) + ind = std::max(ind, ind2); + } + if (ind != std::string::npos) + { + auto dirName = opt.temp_files.substr(0, ind); + //When our MinGW builds support c++17, change to std::filesystem + + //std::filesystem::path filepath = dirName; + //if (!std::filesystem::is_directory(filepath)) + //{ + // std::filesystem::create_directory(filepath); + //} + CreateDirectory(dirName.c_str(), NULL); //This will do nothing if the directory exists already + } + } + + //There is a bug in Windows, where bustools sort fails. The problem is that + //gcount for some reason fails here if too much is read and returns 0, even though + //it succeeds. Could perhaps be a 32 bit issue somewhere, does size_t become 32 bits? + //Anyway, this is a workaround that fixes the issue - does the same as the flag -m 100000000. + //An interesting observation is that opt.max_memory is set to 1 << 32, which will become exactly + //zero if truncated to 32 bits... + size_t WindowsMaxMemory(size_t mem) { + + const size_t win_mem_max = 1e8; + if (mem > win_mem_max) { + mem = win_mem_max; + } + return mem; + } +#else + void EnsureWindowsTempDirectoryExists(const Bustools_opt &opt) {} + size_t WindowsMaxMemory(size_t mem) {return mem;} + #endif + + + inline bool cmp1(const BUSData &a, const BUSData &b) { if (a.barcode == b.barcode) @@ -328,22 +383,119 @@ inline bool ncmp5(const TP &a, const TP &b) } }; + +void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const BUSData &, const BUSData &)) { + //std::sort(busdata, busdata + N, cmp); + if (t > 1 && N > 100000) { + const size_t s = 256; + std::vector samples, pivots; + + // samples = drawn from 0, s, 2s, ... , t*s + samples.reserve(s*t); + for (int i = 0; i < s*t; ++i) { + samples.push_back(busdata[i * (N / (s*t))]); + } + std::sort(samples.begin(), samples.end(), cmp); + + pivots.reserve(t-1); + // piviots are samples s, 2s, ... , (t-1)*s + for (int i = 1; i < t; ++i) { + pivots.push_back(samples[i * s]); + + //std::cerr << "pivot " << i << " = " << binaryToString(pivots[i-1].barcode, 16) << std::endl; + } + + // buckets are locations of pivots after partitioning + // partition i is between buckets[i] and buckets[i+1] + std::vector buckets(t+1, 0); + buckets[0] = 0; + buckets[t] = N; + + + double partition_time = 0; + clock_t start, end; + start = clock(); + for (int i = 0; i < t-1; i++) { + BUSData p = pivots[i]; + //std::cerr << "partitioning around " << binaryToString(p.barcode, 16) << std::endl; + auto mid = std::partition(busdata + buckets[i], busdata + N, [&p, &cmp](const BUSData &a) { return cmp(a, p); }) - busdata; + buckets[i+1] = mid; + //std::cerr << "bucket " << i << " has " << buckets[i+1] - buckets[i] << " elements, mid = " << mid << std::endl; + } + //std::cerr << "bucket " << t-1 << " has " << buckets[t] - buckets[t-1] << " elements" << std::endl; + + // verify that the pivots are sorted + for (int i = 0; i < t-2; i++) { + if (!cmp(pivots[i], pivots[i+1])) { + std::cerr << "pivot " << i << " is not smaller than pivot " << i+1 << std::endl; + exit(1); + } + } + + //verify that each partition is smaller than the pivot + for (int i = 0; i < t; i++) { + for (size_t j = buckets[i]; j < buckets[i+1]; j++) { + if (i < t-1 && !cmp(busdata[j], pivots[i])) { + std::cerr << "partition " << i << " has an element larger than the pivot" << std::endl; + std::cerr << "element " << j << " = " << binaryToString(busdata[j].barcode, 16) << std::endl; + std::cerr << "pivot " << i << " = " << binaryToString(pivots[i].barcode, 16) << std::endl; + + exit(1); + } + if (i > 0 && cmp(busdata[j], pivots[i-1])) { + std::cerr << "partition " << i << " has an element smaller than the next pivot" << std::endl; + std::cerr << "element " << j << " = " << binaryToString(busdata[j].barcode, 16) << std::endl; + std::cerr << "pivot " << i+1 << " = " << binaryToString(pivots[i+1].barcode, 16) << std::endl; + exit(1); + } + } + } + + // partition the busdata based on the middle pivot + /* + std::function mid_partition = [&](int i, int j) { + if (j-i <= 1) { + return; + } + size_t k = (j-i)/2; + BUSData p = pivots[k-1]; + buckets[j] = std::partition(busdata + buckets[i], busdata + buckets[j], [&p, &cmp](const BUSData &a) { return cmp(a, p); }); + mid_partition(i, k); + mid_partition(k, j); + }; + + mid_partition(0, t); + */ + end = clock(); + partition_time += ((double) (end - start)) / CLOCKS_PER_SEC; + std::cerr << "partition time: " << partition_time << "s" << std::endl; + + + + // sort each bucket + std::vector workers; + for (int i = 0; i < t; ++i) { + workers.push_back(std::thread([&busdata, &buckets, &cmp, i]() { + //std::cerr << "sorting bucket " << i << " with " << buckets[i] << " to " << buckets[i+1]<< std::endl; + std::sort(busdata + buckets[i], busdata + buckets[i+1], cmp); + })); + } + + for (auto &w : workers) { + w.join(); + } + + + } else { + std::sort(busdata, busdata + N, cmp); + } + +} + void bustools_sort(const Bustools_opt &opt) { - auto mem = opt.max_memory; - //There is a bug in Windows, where bustools sort fails. The problem is that - //gcount for some reason fails here if too much is read and returns 0, even though - //it succeeds. Could perhaps be a 32 bit issue somewhere, does size_t become 32 bits? - //Anyway, this is a workaround that fixes the issue - does the same as the flag -m 100000000. - //An interesting observation is that opt.max_memory is set to 1 << 32, which will become exactly - //zero if truncated to 32 bits... -#if defined(__MINGW32__) || defined(_MSC_VER) - const size_t win_mem_max = 1e8; - if (mem > win_mem_max) - { - mem = win_mem_max; - } -#endif + auto mem = WindowsMaxMemory(opt.max_memory); + BUSHeader h; size_t N = mem / sizeof(BUSData); BUSData *p = new BUSData[N]; @@ -381,49 +533,60 @@ void bustools_sort(const Bustools_opt &opt) exit(1); } -#if defined(__MINGW32__) || defined(_MSC_VER) - //Make sure to create the tmp directory if it doesn't exist - writing temporary files fails otherwise in Windows - //First get the directory - in theory, opt.temp_files can look like "tmp/x_" or just "x_" (or even nothing) - //so we should find the last slash and make sure that directory exists - std::size_t ind = opt.temp_files.rfind('/'); - std::size_t ind2 = opt.temp_files.rfind('\\'); - if (ind == std::string::npos) - { - ind = ind2; - } - else if (ind2 != std::string::npos) - { - //both valid, take the largest value (representing the last slash) - ind = std::max(ind, ind2); - } - if (ind != std::string::npos) - { - auto dirName = opt.temp_files.substr(0, ind); - //When our MinGW builds support c++17, change to std::filesystem - - //std::filesystem::path filepath = dirName; - //if (!std::filesystem::is_directory(filepath)) - //{ - // std::filesystem::create_directory(filepath); - //} - CreateDirectory(dirName.c_str(), NULL); //This will do nothing if the directory exists already - } -#endif + - size_t sc = 0; + size_t sc = 0; // number of records read + double sorting_time = 0; int tmp_file_no = 0; - for (const auto &infn : opt.files) - { + + // only use a single buffer if we are reading from stdin or if we have a single file + bool all_in_buffer = opt.stream_in || opt.files.size() == 1; + + + const auto collapse_and_write = [&](BUSData *p, size_t rc, std::ostream &outf) { + for (size_t i = 0; i < rc;) { + size_t j = i + 1; + uint32_t c = p[i].count; + auto ec = p[i].ec; + for (; j < rc; j++) { + if (p[i].barcode == p[j].barcode && p[i].UMI == p[j].UMI && p[i].ec == p[j].ec && p[i].flags == p[j].flags && p[i].pad == p[j].pad) { + c += p[j].count; + } else { + break; + } + } + // merge identical things + p[i].count = c; + outf.write((char *)(&(p[i])), sizeof(p[i])); + // increment + i = j; + } + }; + + // open the correct output stream + std::ofstream of; + std::streambuf *buf = nullptr; + if (!opt.stream_out) { + of.open(opt.output, std::ios::out | std::ios::binary); + buf = of.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream busf_out(buf); + + // measure time spent reading input + clock_t start,end; + double reading_time = 0; + double writing_time = 0; + + for (const auto &infn : opt.files) { std::streambuf *inbuf; std::ifstream inf; - if (!opt.stream_in) - { + if (!opt.stream_in) { inf.open(infn.c_str(), std::ios::binary); inbuf = inf.rdbuf(); - } - else - { + } else { inbuf = std::cin.rdbuf(); } std::istream in(inbuf); @@ -432,163 +595,163 @@ void bustools_sort(const Bustools_opt &opt) int rc = 1; - while (in.good()) - { - // read as much as we can + + while (in.good()) { + + start = clock(); in.read((char *)p, N * sizeof(BUSData)); size_t rc = in.gcount() / sizeof(BUSData); - if (rc == 0) - { + end = clock(); + reading_time += ((double) (end - start)) / CLOCKS_PER_SEC; + + // no records read, we are done + if (rc == 0) { break; } + + // records did not fit in buffer + if (rc >= N) { + all_in_buffer = false; + } + // now sort the data - std::sort(p, p + rc, cmp); - sc += rc; + start = clock(); + //std::sort(p, p + rc, cmp); + sort_bus_array(p, rc, opt.threads, cmp); + end = clock(); + sorting_time += ((double) (end - start)) / CLOCKS_PER_SEC; - // write the output - std::ofstream outf(opt.temp_files + std::to_string(tmp_file_no), std::ios::binary); - writeHeader(outf, h); + sc += rc; - for (size_t i = 0; i < rc;) - { - size_t j = i + 1; - uint32_t c = p[i].count; - auto ec = p[i].ec; - for (; j < rc; j++) - { - if (p[i].barcode == p[j].barcode && p[i].UMI == p[j].UMI && p[i].ec == p[j].ec && p[i].flags == p[j].flags && p[i].pad == p[j].pad) - { - c += p[j].count; - } - else - { - break; - } - } - // merge identical things - p[i].count = c; - outf.write((char *)(&(p[i])), sizeof(p[i])); - // increment - i = j; + if (all_in_buffer) { + std::cerr << " all fits in buffer" << std::endl; + // single file or stream, all data fits in buffer, write directly to output + start = clock(); + writeHeader(busf_out, h); + collapse_and_write(p, rc, busf_out); + end = clock(); + writing_time = ((double) (end - start)) / CLOCKS_PER_SEC; + } else { + // need to sort in chunks + // write the output + std::ofstream outf(opt.temp_files + std::to_string(tmp_file_no), std::ios::binary); + writeHeader(outf, h); + + collapse_and_write(p, rc, outf); + + outf.close(); + tmp_file_no++; } - - outf.close(); - tmp_file_no++; + } } delete[] p; p = nullptr; std::cerr << "Read in " << sc << " BUS records" << std::endl; + + std::cerr << "reading time " << reading_time << "s" << std::endl; + std::cerr << "sorting time " << sorting_time << "s" << std::endl; + std::cerr << "writing time " << writing_time << "s" << std::endl; - std::streambuf *buf = nullptr; - std::ofstream of; - if (!opt.stream_out) - { - of.open(opt.output, std::ios::out | std::ios::binary); - buf = of.rdbuf(); - } - else - { - buf = std::cout.rdbuf(); - } - std::ostream busf_out(buf); - writeHeader(busf_out, h); + + if (!all_in_buffer) { + writeHeader(busf_out, h); - // todo: skip writing to disk if it fits in memory - if (tmp_file_no == 1) - { - size_t M = N / 8; - p = new BUSData[M]; - std::ifstream in(opt.temp_files + "0", std::ios::binary); - BUSHeader tmp; - parseHeader(in, tmp); - while (in.good()) + if (tmp_file_no == 1) { - // read as much as we can - in.read((char *)p, M * sizeof(BUSData)); - size_t rc = in.gcount() / sizeof(BUSData); - if (rc == 0) + size_t M = N / 8; + p = new BUSData[M]; + std::ifstream in(opt.temp_files + "0", std::ios::binary); + BUSHeader tmp; + parseHeader(in, tmp); + while (in.good()) { - break; + // read as much as we can + in.read((char *)p, M * sizeof(BUSData)); + size_t rc = in.gcount() / sizeof(BUSData); + if (rc == 0) + { + break; + } + busf_out.write((char *)p, rc * sizeof(BUSData)); } - busf_out.write((char *)p, rc * sizeof(BUSData)); + in.close(); + std::remove((opt.temp_files + "0").c_str()); } - in.close(); - std::remove((opt.temp_files + "0").c_str()); - } - else - { - // TODO: test if replacing with k-way merge is better - // adapted from https://github.com/arq5x/kway-mergesort/blob/master/kwaymergesort.h - int k = tmp_file_no; - size_t M = N / (k); - //std::memset(p, 0, N*sizeof(BUSData)); - std::vector bf(k); - for (int i = 0; i < k; i++) - { - bf[i].open((opt.temp_files + std::to_string(i)).c_str(), std::ios::binary); - BUSHeader tmp; - parseHeader(bf[i], tmp); - } - - std::priority_queue, std::function> pq(ncmp); - BUSData t; - for (int i = 0; i < k; i++) - { - bf[i].read((char *)&t, sizeof(t)); - pq.push({t, i}); - } - - BUSData curr = pq.top().first; - curr.count = 0; // we'll count this again in the first loop - while (!pq.empty()) + else { - TP min = pq.top(); + // TODO: test if replacing with k-way merge is better + // adapted from https://github.com/arq5x/kway-mergesort/blob/master/kwaymergesort.h + int k = tmp_file_no; + size_t M = N / (k); + //std::memset(p, 0, N*sizeof(BUSData)); + std::vector bf(k); + for (int i = 0; i < k; i++) + { + bf[i].open((opt.temp_files + std::to_string(i)).c_str(), std::ios::binary); + BUSHeader tmp; + parseHeader(bf[i], tmp); + } - pq.pop(); - // process the data - BUSData &m = min.first; - int i = min.second; - if (m.barcode == curr.barcode && m.UMI == curr.UMI && m.ec == curr.ec && m.flags == curr.flags && m.pad == curr.pad) + std::priority_queue, std::function> pq(ncmp); + BUSData t; + for (int i = 0; i < k; i++) { - // same data, increase count - curr.count += m.count; + bf[i].read((char *)&t, sizeof(t)); + pq.push({t, i}); } - else + + BUSData curr = pq.top().first; + curr.count = 0; // we'll count this again in the first loop + while (!pq.empty()) { + TP min = pq.top(); - // new data let's output curr, new curr is m - if (curr.count != 0) + pq.pop(); + // process the data + BUSData &m = min.first; + int i = min.second; + if (m.barcode == curr.barcode && m.UMI == curr.UMI && m.ec == curr.ec && m.flags == curr.flags && m.pad == curr.pad) { - busf_out.write((char *)&curr, sizeof(curr)); + // same data, increase count + curr.count += m.count; } - curr = m; - } - // read next from stream - if (bf[i].good()) - { - bf[i].read((char *)&t, sizeof(t)); - if (bf[i].gcount() > 0) + else + { + + // new data let's output curr, new curr is m + if (curr.count != 0) + { + busf_out.write((char *)&curr, sizeof(curr)); + } + curr = m; + } + // read next from stream + if (bf[i].good()) { - pq.push({t, i}); + bf[i].read((char *)&t, sizeof(t)); + if (bf[i].gcount() > 0) + { + pq.push({t, i}); + } } } - } - if (curr.count > 0) - { - // write out remaining straggler - busf_out.write((char *)&curr, sizeof(curr)); - } + if (curr.count > 0) + { + // write out remaining straggler + busf_out.write((char *)&curr, sizeof(curr)); + } - // remove intermediary files - for (int i = 0; i < k; i++) - { - bf[i].close(); - std::remove((opt.temp_files + std::to_string(i)).c_str()); + // remove intermediary files + for (int i = 0; i < k; i++) + { + bf[i].close(); + std::remove((opt.temp_files + std::to_string(i)).c_str()); + } } } From b762a52d2504b7ed1496f19515de7578fa132d86 Mon Sep 17 00:00:00 2001 From: Pall Melsted Date: Tue, 14 Mar 2023 22:14:46 +0000 Subject: [PATCH 26/49] better partition function --- src/bustools_sort.cpp | 47 +++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/src/bustools_sort.cpp b/src/bustools_sort.cpp index 046f265..ae79e66 100644 --- a/src/bustools_sort.cpp +++ b/src/bustools_sort.cpp @@ -415,6 +415,7 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B double partition_time = 0; clock_t start, end; start = clock(); + /* for (int i = 0; i < t-1; i++) { BUSData p = pivots[i]; //std::cerr << "partitioning around " << binaryToString(p.barcode, 16) << std::endl; @@ -423,7 +424,28 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B //std::cerr << "bucket " << i << " has " << buckets[i+1] - buckets[i] << " elements, mid = " << mid << std::endl; } //std::cerr << "bucket " << t-1 << " has " << buckets[t] - buckets[t-1] << " elements" << std::endl; + */ + + // partition the busdata based on the middle pivot + std::function mid_partition = [&](int i, int j) { + if (j-i <= 1) { + return; + } + size_t k = i + (j-i)/2; + //std::cerr << "partitioning " << i << " to " << j << " with middle " << k << std::endl; + //std::cerr << "buckets i and j are " << buckets[i] << " and " << buckets[j] << std::endl; + BUSData p = pivots[k-1]; + //std::cerr << "pivot element is " << binaryToString(p.barcode, 16) << std::endl; + buckets[k] = std::partition(busdata + buckets[i], busdata + buckets[j], [&p, &cmp](const BUSData &a) { return cmp(a, p); }) - busdata; + //std::cerr << "bucket " << k << " is " << buckets[k] << std::endl; + mid_partition(i, k); + mid_partition(k, j); + }; + mid_partition(0, t); + + + /* // verify that the pivots are sorted for (int i = 0; i < t-2; i++) { if (!cmp(pivots[i], pivots[i+1])) { @@ -431,7 +453,11 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B exit(1); } } - + + for (int i = 0; i < t; i++) { + std::cerr << "bucket " << i << " at " << buckets[i] << " has " << buckets[i+1] - buckets[i] << " elements" << std::endl; + } + //verify that each partition is smaller than the pivot for (int i = 0; i < t; i++) { for (size_t j = buckets[i]; j < buckets[i+1]; j++) { @@ -445,27 +471,14 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B if (i > 0 && cmp(busdata[j], pivots[i-1])) { std::cerr << "partition " << i << " has an element smaller than the next pivot" << std::endl; std::cerr << "element " << j << " = " << binaryToString(busdata[j].barcode, 16) << std::endl; - std::cerr << "pivot " << i+1 << " = " << binaryToString(pivots[i+1].barcode, 16) << std::endl; + std::cerr << "pivot " << i-1 << " = " << binaryToString(pivots[i-1].barcode, 16) << std::endl; exit(1); } } } - - // partition the busdata based on the middle pivot - /* - std::function mid_partition = [&](int i, int j) { - if (j-i <= 1) { - return; - } - size_t k = (j-i)/2; - BUSData p = pivots[k-1]; - buckets[j] = std::partition(busdata + buckets[i], busdata + buckets[j], [&p, &cmp](const BUSData &a) { return cmp(a, p); }); - mid_partition(i, k); - mid_partition(k, j); - }; - - mid_partition(0, t); */ + + end = clock(); partition_time += ((double) (end - start)) / CLOCKS_PER_SEC; std::cerr << "partition time: " << partition_time << "s" << std::endl; From 679f84b0cf3509ddc9dd3bd5b6ce13846dbd99ec Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Wed, 15 Mar 2023 03:10:21 -0700 Subject: [PATCH 27/49] Add priority option --- src/bustools_main.cpp | 123 ++++++++++++++++++++---------------------- 1 file changed, 57 insertions(+), 66 deletions(-) diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index 991c099..24c8814 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -335,6 +335,8 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) int cm_flag = 0; int hist_flag = 0; int rawcounts_flag = 0; + int priority_one = 0; + int priority_two = 0; static struct option long_options[] = { {"output", required_argument, 0, 'o'}, {"genemap", required_argument, 0, 'g'}, @@ -349,6 +351,8 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) {"downsample", required_argument, 0, 'd'}, {"rawcounts", no_argument, &rawcounts_flag, 1}, {"split", required_argument, 0, 's'}, + {"priority-1", no_argument, &priority_one, 1}, + {"priority-2", no_argument, &priority_two, 1}, {0, 0, 0, 0}}; int option_index = 0, c; @@ -383,20 +387,16 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) break; } } - if (gene_flag) - { + if (gene_flag) { opt.count_collapse = true; } - if (umigene_flag) - { + if (umigene_flag) { opt.umi_gene_collapse = true; } - if (em_flag) - { + if (em_flag) { opt.count_em = true; } - if (cm_flag) - { + if (cm_flag) { opt.count_cm = true; } if (hist_flag) { @@ -405,6 +405,15 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) if (rawcounts_flag) { opt.count_raw_counts = true; } + if (priority_one) { + opt.count_mtx_priority = 1; + } + if (priority_two) { + opt.count_mtx_priority = 2; + } + if (priority_one && priority_two) { + opt.count_mtx_priority = -1; // Can't supply both, raise an error later + } while (optind < argc) opt.files.push_back(argv[optind++]); @@ -1636,95 +1645,75 @@ bool check_ProgramOptions_count(Bustools_opt &opt) bool ret = true; // check for output directory - if (opt.output.empty()) - { + if (opt.output.empty()) { std::cerr << "Error: Missing output directory" << std::endl; ret = false; } - else - { + else { bool isDir = false; - if (checkDirectoryExists(opt.output)) - { + if (checkDirectoryExists(opt.output)) { isDir = true; } - else - { - if (opt.output.at(opt.output.size() - 1) == '/') - { - if (my_mkdir(opt.output.c_str(), 0777) == -1) - { + else { + if (opt.output.at(opt.output.size() - 1) == '/') { + if (my_mkdir(opt.output.c_str(), 0777) == -1) { std::cerr << "Error: could not create directory " << opt.output << std::endl; ret = false; } - else - { + else { isDir = true; } } } - if (isDir) - { + if (isDir) { opt.output += "output"; } } - if (opt.count_em && opt.count_gene_multimapping) - { + if (opt.count_em && opt.count_gene_multimapping) { std::cerr << "Error: EM algorithm and counting multimapping reads are incompatible" << std::endl; ret = false; } - if (opt.count_em && opt.count_cm) - { + if (opt.count_em && opt.count_cm) { std::cerr << "Error: EM algorithm and counting multiplicites are incompatible" << std::endl; ret = false; } - if (opt.umi_gene_collapse && opt.count_cm) - { + if (opt.umi_gene_collapse && opt.count_cm) { std::cerr << "Error: Gene-level collapsing of UMIs and counting multiplicites are incompatible" << std::endl; ret = false; } - if (opt.umi_gene_collapse && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0)) - { + if (opt.umi_gene_collapse && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0)) { std::cerr << "Error: Gene-level collapsing of UMIs is currently incompatible with --hist, --downsample, or --rawcounts" << std::endl; ret = false; } - if (opt.count_cm && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0)) - { + if (opt.count_cm && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0)) { std::cerr << "Error: Counting multiplicites is incompatible with --hist, --downsample, or --rawcounts" << std::endl; ret = false; } - if (opt.count_raw_counts && opt.count_em) - { + if (opt.count_raw_counts && opt.count_em) { std::cerr << "Error: Counting raw counts are not supported for the EM algorithm" << std::endl; ret = false; } - if (opt.count_raw_counts && !opt.count_collapse) - { + if (opt.count_raw_counts && !opt.count_collapse) { std::cerr << "Error: Raw counts are currently only supported for gene counting, not ec counting." << std::endl; ret = false; } - if (opt.files.size() == 0) - { + if (opt.files.size() == 0) { std::cerr << "Error: Missing BUS input files" << std::endl; ret = false; } - else - { - if (!opt.stream_in) - { - for (const auto &it : opt.files) - { - if (!checkFileExists(it)) - { + else { + if (!opt.stream_in) { + for (const auto &it : opt.files) { + if (!checkFileExists(it)) { std::cerr << "Error: File not found, " << it << std::endl; ret = false; } @@ -1732,13 +1721,11 @@ bool check_ProgramOptions_count(Bustools_opt &opt) } } - if (opt.count_genes.size() == 0) - { + if (opt.count_genes.size() == 0) { std::cerr << "Error: missing gene mapping file" << std::endl; ret = false; } - else - { + else { if (!checkFileExists(opt.count_genes)) { std::cerr << "Error: File not found " << opt.count_genes << std::endl; @@ -1746,13 +1733,11 @@ bool check_ProgramOptions_count(Bustools_opt &opt) } } - if (opt.count_ecs.size() == 0) - { + if (opt.count_ecs.size() == 0) { std::cerr << "Error: missing equivalence class mapping file" << std::endl; ret = false; } - else - { + else { if (!checkFileExists(opt.count_ecs)) { std::cerr << "Error: File not found " << opt.count_ecs << std::endl; @@ -1760,24 +1745,19 @@ bool check_ProgramOptions_count(Bustools_opt &opt) } } - if (opt.count_txp.size() == 0) - { + if (opt.count_txp.size() == 0) { std::cerr << "Error: missing transcript name file" << std::endl; ret = false; } - else - { - if (!checkFileExists(opt.count_txp)) - { + else { + if (!checkFileExists(opt.count_txp)) { std::cerr << "Error: File not found " << opt.count_txp << std::endl; ret = false; } } - if (opt.count_split.size() != 0) - { - if (!checkFileExists(opt.count_split)) - { + if (opt.count_split.size() != 0) { + if (!checkFileExists(opt.count_split)) { std::cerr << "Error: File not found " << opt.count_split << std::endl; ret = false; } @@ -1787,6 +1767,15 @@ bool check_ProgramOptions_count(Bustools_opt &opt) } } + if (opt.count_mtx_priority == -1) { + std::cerr << "Error: Cannot specify multiply options for priority " << std::endl; + ret = false; + } + if (opt.count_mtx_priority > 0 && opt.count_split.size() == 0) { + std::cerr << "Error: Cannot use priority unless -s is specified " << std::endl; + ret = false; + } + return ret; } @@ -2675,6 +2664,8 @@ void Bustools_count_Usage() << " --em Estimate gene abundances using EM algorithm" << std::endl << " --cm Count multiplicites instead of UMIs" << std::endl << "-s, --split Split output matrix in two (plus ambiguous) based on transcripts supplied in this file" << std::endl + << " --priority-1 For --split, prioritize first matrix in split matrix for UMIs that multimap to both splits" << std::endl + << " --priority-2 For --split, prioritize second matrix in split matrix for UMIs that multimap to both splits" << std::endl << "-m, --multimapping Include bus records that pseudoalign to multiple genes" << std::endl << " --hist Output copy per UMI histograms for all genes" << std::endl << "-d --downsample Specify a factor between 0 and 1 specifying how much to downsample" << std::endl From 2aea2ee757ba6b13e8d6732d0342c77b744ac367 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Wed, 15 Mar 2023 03:12:26 -0700 Subject: [PATCH 28/49] updated common for count_mtx_priority --- src/Common.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common.hpp b/src/Common.hpp index 6a50ebd..89d8b2d 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -72,6 +72,7 @@ struct Bustools_opt std::string count_ecs; std::string count_txp; std::string count_split; + int count_mtx_priority = 0; bool count_em = false; bool count_cm = false; bool count_collapse = false; From 3f287d515332192146508008211f5e25dfc9d7f9 Mon Sep 17 00:00:00 2001 From: Pall Melsted Date: Wed, 15 Mar 2023 12:16:23 +0000 Subject: [PATCH 29/49] batches writes --- src/bustools_sort.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/bustools_sort.cpp b/src/bustools_sort.cpp index ae79e66..adc5e7e 100644 --- a/src/bustools_sort.cpp +++ b/src/bustools_sort.cpp @@ -558,6 +558,10 @@ void bustools_sort(const Bustools_opt &opt) const auto collapse_and_write = [&](BUSData *p, size_t rc, std::ostream &outf) { + size_t batch = 1<<20; + std::vector v; + v.reserve(batch); + for (size_t i = 0; i < rc;) { size_t j = i + 1; uint32_t c = p[i].count; @@ -571,10 +575,23 @@ void bustools_sort(const Bustools_opt &opt) } // merge identical things p[i].count = c; - outf.write((char *)(&(p[i])), sizeof(p[i])); + + // push back p to the vector + v.push_back(p[i]); + + if (v.size() >= batch) { + outf.write((char *)v.data(), v.size() * sizeof(BUSData)); + v.clear(); + } + + //outf.write((char *)(&(p[i])), sizeof(p[i])); // increment i = j; } + if (v.size() > 0) { + outf.write((char *)v.data(), v.size() * sizeof(BUSData)); + v.clear(); + } }; // open the correct output stream From a7af47ad77c29e0d814bbe4129ee417c4f749e3f Mon Sep 17 00:00:00 2001 From: Yenaled Date: Fri, 17 Mar 2023 19:24:18 +0000 Subject: [PATCH 30/49] Priority rules for mtx types --- src/Common.cpp | 54 ++++++++++++++++++++++++++++++++++++++---- src/Common.hpp | 1 - src/bustools_count.cpp | 43 +++++++++++++++++++++++++++------ src/bustools_main.cpp | 24 ------------------- 4 files changed, 86 insertions(+), 36 deletions(-) diff --git a/src/Common.cpp b/src/Common.cpp index 6e0d89f..58c169c 100644 --- a/src/Common.cpp +++ b/src/Common.cpp @@ -335,11 +335,57 @@ COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector& ecs, const std::vector> &ecmap, const std::vector& tx_split) { + // Note: tx_split indices are tx ids and values are 1 (exists in split) or 0 (does not exist in split) if (tx_split.size() == 0) return COUNT_DEFAULT; if (ecs.size() == 0) return COUNT_AMBIGUOUS; // Shouldn't happen + std::vector u; + u.resize(0); + auto &v = ecmap[ecs[0]]; // copy + for (size_t i = 0; i< v.size(); i++) { + u.push_back(v[i]); + } + for (size_t i = 1; i < ecs.size(); i++) { + const auto &v = ecmap[ecs[i]]; + + int j = 0; + int k = 0; + int l = 0; + int n = u.size(); + int m = v.size(); + // u and v are sorted, j,k,l = 0 + while (j < n && l < m) { + // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m + // u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted + if (u[j] < v[l]) { + j++; + } else if (u[j] > v[l]) { + l++; + } else { + // match + if (k < j) { + std::swap(u[k], u[j]); + } + k++; + j++; + l++; + } + } + if (k < n) { + u.resize(k); + } + } size_t n_1 = 0; size_t n_2 = 0; - for (auto ec : ecs) { // We still need to optimize this + for (auto t : u) { + if(tx_split[t]) { + n_2++; + } else { + n_1++; + } + if (n_1 > 0 && n_2 > 0) break; // Stop searching + } + return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT)); + /*for (auto ec : ecs) { // We still need to optimize this for (auto t: ecmap[ec]) { if(std::find(tx_split.begin(), tx_split.end(), t) != tx_split.end()) { n_2++; @@ -349,8 +395,8 @@ COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector& ecs, co if (n_1 > 0 && n_2 > 0) break; // Stop searching } if (n_1 > 0 && n_2 > 0) break; // Stop searching - } - return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT)); + }*/ + //return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT)); } @@ -359,4 +405,4 @@ void copy_file(std::string src, std::string dest) { std::ofstream idest(dest, std::ios::binary); idest << isrc.rdbuf(); -} \ No newline at end of file +} diff --git a/src/Common.hpp b/src/Common.hpp index 89d8b2d..6a50ebd 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -72,7 +72,6 @@ struct Bustools_opt std::string count_ecs; std::string count_txp; std::string count_split; - int count_mtx_priority = 0; bool count_em = false; bool count_cm = false; bool count_collapse = false; diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index e0d125f..f155669 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -23,7 +23,9 @@ void bustools_count(Bustools_opt &opt) { u_map_ txnames; auto txnames_split = txnames; // copy - std::vector tx_split; + std::vector tx_split; // Store transcript names for split + std::vector tx_split_lookup; // Map transcript IDs to mtx status + int count_mtx_priority = !opt.count_gene_multimapping ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons] parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); u_map_ genenames; @@ -34,7 +36,7 @@ void bustools_count(Bustools_opt &opt) { for (int32_t ec = 0; ec < ecmap.size(); ec++) { ecmapinv.insert({ecmap[ec], ec}); } - std::vector> ec2genes; + std::vector> ec2genes, ec2genes_priority; create_ec2genes(ecmap, genemap, ec2genes); @@ -64,6 +66,7 @@ void bustools_count(Bustools_opt &opt) { if (count_split) { parseTranscripts(opt.count_split, txnames_split); // subset of txnames tx_split.reserve(txnames_split.size()); + tx_split_lookup.resize(txnames.size(), -1); for (auto x : txnames_split) { if (txnames.count(x.first)) tx_split.push_back(txnames[x.first]); } @@ -71,6 +74,25 @@ void bustools_count(Bustools_opt &opt) { of_A.open(mtx_ofn_split_A); of_2 << ssHeader.str(); of_A << ssHeader.str(); + auto ecmap_ = ecmap; // copy + for (int ec = 0; ec < ecmap.size(); ec++) { // Get new ecmap based on split + for (auto tx : ecmap[ec]) { + auto &new_ec = ecmap_[ec]; + bool found = std::find(tx_split.begin(), tx_split.end(), tx) != tx_split.end(); + tx_split_lookup[tx] = found; + // Remove transcripts depending on whether they're found in tx_split + // Note: It is possible for one of the new ECs to be empty, in which case intersect_genes_of_ecs will result in the empty set for glist + // Essentially, we are removing all tx's that belong to (or not belong to) tx_split in the equivalence classes + // This handles instances in which a read maps to exon of one gene but intron of another (likely overlapping) gene to avoid discarding the record + // This is done at read-level (not UMI-level) so if one UMI maps to one gene's exon but another UMI maps to the other gene's intron, we still discard it + if (count_mtx_priority == 1 && !found) + new_ec.erase(std::remove(new_ec.begin(), new_ec.end(), tx), new_ec.end()); + else if (count_mtx_priority == 2 && found) + new_ec.erase(std::remove(new_ec.begin(), new_ec.end(), tx), new_ec.end()); + } + } + if (count_mtx_priority != 0) + create_ec2genes(ecmap_, genemap, ec2genes_priority); // Note: Some ECs may not be associated with any genes (i.e. empty vector) } of.open(mtx_ofn); of << ssHeader.str(); @@ -151,6 +173,7 @@ void bustools_count(Bustools_opt &opt) { if (opt.umi_gene_collapse) { intersect_genes_of_ecs(ecs,ec2genes, glist); + if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist); } if (opt.umi_gene_collapse && glist.size() == 0) { // Gene-intersection zero, check for UMI collision @@ -160,6 +183,7 @@ void bustools_count(Bustools_opt &opt) { for (size_t k = 0; k < ecs.size(); k++) { ecs_within_molecule.push_back(ecs[k]); intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist); + if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist); if (glist.size() == 0) { ecs_within_molecule.pop_back(); } else { @@ -233,7 +257,7 @@ void bustools_count(Bustools_opt &opt) { } } double val = j-i; - auto which_mtx = intersect_ecs_with_subset_txs(column_v[i], ecmap, tx_split); + auto which_mtx = intersect_ecs_with_subset_txs(column_v[i], ecmap, tx_split_lookup); auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A); auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A); of_ << n_rows << " " << (column_v[i]+1) << " " << val << "\n"; @@ -275,6 +299,7 @@ void bustools_count(Bustools_opt &opt) { } intersect_genes_of_ecs(ecs,ec2genes, glist); + if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist); int gn = glist.size(); if (opt.count_downsampling_factor != 1.0) { uint32_t newCounts = 0; @@ -289,7 +314,7 @@ void bustools_count(Bustools_opt &opt) { } } if (gn > 0) { - auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split); + auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split_lookup); if (opt.count_gene_multimapping) { for (auto x : glist) { column_vp.push_back({x, {(opt.count_raw_counts ? counts : 1.0)/gn, which_mtx}}); @@ -342,6 +367,7 @@ void bustools_count(Bustools_opt &opt) { for (size_t k = 0; k < ecs.size(); k++) { ecs_within_molecule.push_back(ecs[k]); intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist); + if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist); if (glist.size() == 0) { ecs_within_molecule.pop_back(); } else { @@ -351,10 +377,11 @@ void bustools_count(Bustools_opt &opt) { } if (glist.size() == 0) { intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist); + if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist); } gn = glist.size(); if (gn > 0) { - auto which_mtx = intersect_ecs_with_subset_txs(ecs_within_molecule, ecmap, tx_split); + auto which_mtx = intersect_ecs_with_subset_txs(ecs_within_molecule, ecmap, tx_split_lookup); if (opt.count_gene_multimapping) { for (auto x : glist) { column_vp.push_back({x, {1.0/gn, which_mtx}}); @@ -375,9 +402,10 @@ void bustools_count(Bustools_opt &opt) { ecs.push_back(v[i].ec); intersect_genes_of_ecs(ecs, ec2genes, glist); + if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist); int gn = glist.size(); if (gn > 0) { - auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split); + auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split_lookup); if (opt.count_gene_multimapping) { for (auto x : glist) { column_vp.push_back({x, {v[i].count/gn, which_mtx}}); @@ -532,6 +560,7 @@ void bustools_count(Bustools_opt &opt) { ecs.resize(0); ecs.push_back(ec); intersect_genes_of_ecs(ecs, ec2genes, glist); + if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist); int gn = glist.size(); if (gn != 1) { continue; @@ -550,7 +579,7 @@ void bustools_count(Bustools_opt &opt) { } val += column_vp[j].second.first; } - auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split); + auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split_lookup); auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A); auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A); of_ << n_rows << " " << (column_vp[i].first+1) << " " << val << "\n"; diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index 24c8814..d19d01e 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -335,8 +335,6 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) int cm_flag = 0; int hist_flag = 0; int rawcounts_flag = 0; - int priority_one = 0; - int priority_two = 0; static struct option long_options[] = { {"output", required_argument, 0, 'o'}, {"genemap", required_argument, 0, 'g'}, @@ -351,8 +349,6 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) {"downsample", required_argument, 0, 'd'}, {"rawcounts", no_argument, &rawcounts_flag, 1}, {"split", required_argument, 0, 's'}, - {"priority-1", no_argument, &priority_one, 1}, - {"priority-2", no_argument, &priority_two, 1}, {0, 0, 0, 0}}; int option_index = 0, c; @@ -405,15 +401,6 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt) if (rawcounts_flag) { opt.count_raw_counts = true; } - if (priority_one) { - opt.count_mtx_priority = 1; - } - if (priority_two) { - opt.count_mtx_priority = 2; - } - if (priority_one && priority_two) { - opt.count_mtx_priority = -1; // Can't supply both, raise an error later - } while (optind < argc) opt.files.push_back(argv[optind++]); @@ -1767,15 +1754,6 @@ bool check_ProgramOptions_count(Bustools_opt &opt) } } - if (opt.count_mtx_priority == -1) { - std::cerr << "Error: Cannot specify multiply options for priority " << std::endl; - ret = false; - } - if (opt.count_mtx_priority > 0 && opt.count_split.size() == 0) { - std::cerr << "Error: Cannot use priority unless -s is specified " << std::endl; - ret = false; - } - return ret; } @@ -2664,8 +2642,6 @@ void Bustools_count_Usage() << " --em Estimate gene abundances using EM algorithm" << std::endl << " --cm Count multiplicites instead of UMIs" << std::endl << "-s, --split Split output matrix in two (plus ambiguous) based on transcripts supplied in this file" << std::endl - << " --priority-1 For --split, prioritize first matrix in split matrix for UMIs that multimap to both splits" << std::endl - << " --priority-2 For --split, prioritize second matrix in split matrix for UMIs that multimap to both splits" << std::endl << "-m, --multimapping Include bus records that pseudoalign to multiple genes" << std::endl << " --hist Output copy per UMI histograms for all genes" << std::endl << "-d --downsample Specify a factor between 0 and 1 specifying how much to downsample" << std::endl From 7ef8da389cfdb8f86dd5be01ee8d310e09d53bbf Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Wed, 22 Mar 2023 08:14:31 -0700 Subject: [PATCH 31/49] update count to output barcode prefix --- src/bustools_count.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index f155669..8852722 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -48,6 +48,7 @@ void bustools_count(Bustools_opt &opt) { std::string mtx_ofn_split_2 = opt.output + ".2.mtx"; std::string mtx_ofn_split_A = opt.output + ".ambiguous.mtx"; std::string barcodes_ofn = opt.output + ".barcodes.txt"; + std::string barcodes_prefix_ofn = opt.output + ".barcodes.prefix.txt"; std::string ec_ofn = opt.output + ".ec.txt"; std::string gene_ofn = opt.output + ".genes.txt"; std::string hist_ofn = opt.output + ".hist.txt"; @@ -691,11 +692,22 @@ void bustools_count(Bustools_opt &opt) { writeGenes(gene_ofn, genenames); } // write barcode file + bool write_prefix = false; std::ofstream bcof; bcof.open(barcodes_ofn); + uint64_t len_mask = ((1ULL << (2*bclen)) - 1); for (const auto &x : barcodes) { + if (x != (x & len_mask)) write_prefix = true; bcof << binaryToString(x, bclen) << "\n"; } + if (write_prefix) { + std::ofstream bcprefixof; + bcprefixof.open(barcodes_prefix_ofn); + for (const auto &x : barcodes) { + bcprefixof << binaryToString(x >> (2*bclen), 32-bclen) << "\n"; + } + bcprefixof.close(); + } bcof.close(); //write histogram file From 5dc2bcf781b751a0d5cf9f51923427802d8cceb3 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Wed, 22 Mar 2023 08:15:54 -0700 Subject: [PATCH 32/49] cleanup len_mask in bustools correct --- src/bustools_correct.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index bfeec77..c854e35 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -258,7 +258,7 @@ void bustools_split_correct(Bustools_opt &opt) } int rc = 0; - uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen + uint64_t len_mask = ((1ULL << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); @@ -534,7 +534,7 @@ void bustools_correct(Bustools_opt &opt) } int rc = 0; - uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen + uint64_t len_mask = ((1ULL << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen while (true) { in.read((char *)p, N * sizeof(BUSData)); From 44d724c95e9e563639e7ead0a87ea0473d268473 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Wed, 22 Mar 2023 09:16:12 -0700 Subject: [PATCH 33/49] fix count_mtx_priority --- src/bustools_count.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index 8852722..f3d1a19 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -25,7 +25,8 @@ void bustools_count(Bustools_opt &opt) { auto txnames_split = txnames; // copy std::vector tx_split; // Store transcript names for split std::vector tx_split_lookup; // Map transcript IDs to mtx status - int count_mtx_priority = !opt.count_gene_multimapping ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons] + bool count_split = !opt.count_split.empty(); + int count_mtx_priority = !opt.count_gene_multimapping && count_split ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons] parseTranscripts(opt.count_txp, txnames); std::vector genemap(txnames.size(), -1); u_map_ genenames; @@ -40,7 +41,6 @@ void bustools_count(Bustools_opt &opt) { create_ec2genes(ecmap, genemap, ec2genes); - bool count_split = !opt.count_split.empty(); std::ofstream of; std::ofstream of_2; std::ofstream of_A; From f5e9de4daa2e70c83b97d3808f89720c40905237 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 27 Mar 2023 01:48:30 -0700 Subject: [PATCH 34/49] fix count_mtx_priority w/ UMI collision logic --- src/bustools_count.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index f3d1a19..6289545 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -184,7 +184,6 @@ void bustools_count(Bustools_opt &opt) { for (size_t k = 0; k < ecs.size(); k++) { ecs_within_molecule.push_back(ecs[k]); intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist); - if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist); if (glist.size() == 0) { ecs_within_molecule.pop_back(); } else { @@ -368,7 +367,6 @@ void bustools_count(Bustools_opt &opt) { for (size_t k = 0; k < ecs.size(); k++) { ecs_within_molecule.push_back(ecs[k]); intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist); - if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist); if (glist.size() == 0) { ecs_within_molecule.pop_back(); } else { From c31d2f11c5bf7a847310f0759f803d9fbee9d480 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 10 Apr 2023 21:29:04 -0700 Subject: [PATCH 35/49] Try multicomponent barcodes --- src/bustools_correct.cpp | 254 +++++++++++++++++++++++---------------- 1 file changed, 151 insertions(+), 103 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index c854e35..dc55e42 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -416,10 +416,9 @@ void bustools_split_correct(Bustools_opt &opt) p = nullptr; } -void bustools_correct(Bustools_opt &opt) -{ +void bustools_correct(Bustools_opt &opt) { uint32_t bclen = 0; - uint32_t wc_bclen = 0; + std::vector wc_bclen; uint32_t umilen = 0; BUSHeader h; size_t nr = 0; @@ -435,57 +434,94 @@ void bustools_correct(Bustools_opt &opt) bool dump_bool = opt.dump_bool; std::ofstream of; - if (dump_bool) - { + if (dump_bool) { of.open(opt.dump); } std::ifstream wf(opt.whitelist, std::ios::in); std::string line; line.reserve(100); - std::unordered_set wbc; - wbc.reserve(100000); + std::vector > wbc; // Each set contains whitelist uint32_t f = 0; - while (std::getline(wf, line)) - { - if (wc_bclen == 0) - { - wc_bclen = line.size(); + bool first_line = true; + while (std::getline(wf, line)) { + std::stringstream ss(line); + std::string barcode; + int i = 0; + while (ss >> barcode) { + std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper); + uint64_t bc = stringToBinary(barcode, f); + if (first_line) { + std::unordered_set bc_set; + bc_set.insert(bc); + wbc.push_back(bc_set); + wbc[i].reserve(100000); + wc_bclen.push_back(barcode.size()); + } else if (i >= wbc.size()) { // Too many barcodes in this line + std::cerr << "Error: whitelist file malformed; encountered " << (i+1) + << " barcodes on a line while " << wbc.size() << " barcodes on another line" + << std::endl; + exit(1); + } else if (barcode.length() != wc_bclen[i]) { + std::cerr << "Error: whitelist file malformed; encountered barcode length " << wc_bclen[i] + << " on a line while " << wbc[i].length() << " barcodes on another line" + << std::endl; + exit(1); + } else { + wbc[i].insert(bc); + } + i++; } - uint64_t bc = stringToBinary(line, f); - wbc.insert(bc); + if (i != wbc[i].size()) { // Incorrect number of barcodes on this line + std::cerr << "Error: whitelist file malformed; encountered " << (i+1) + << " barcodes on a line while " << wbc.size() << " barcodes on another line" + << std::endl; + exit(1); + } + first_line = false; } wf.close(); - std::cerr << "Found " << wbc.size() << " barcodes in the whitelist" << std::endl; - - // split barcode into upper and lower half - size_t bc2 = (wc_bclen + 1) / 2; - - std::vector> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes - - uint64_t mask_size = (1ULL << (2 * bc2)); - uint64_t lower_mask = (1ULL << (2 * bc2)) - 1; - uint64_t upper_mask = (1ULL << (2 * (wc_bclen - bc2))) - 1; - for (uint64_t b : wbc) - { - uint64_t lb = b & lower_mask; - uint64_t ub = (b >> (2 * bc2)) & upper_mask; - - correct[ub].second.add(lb); - correct[lb].first.add(ub); + if (wbc.size() == 0) { + std::cerr << "Error: whitelist file malformed; no barcodes found" < 1) { + std::cerr << "Found " << wbc.size() << " barcode sets" << std::endl; + } + + // split barcode into upper and lower half (across all barcodes in a barcode set) + std::vector>> correct_vec; // size of vector = how many barcode sets there are + std::vector > lower_upper_mask_vec; // size of vector = how many barcode sets there are + std::vector bc2_vec; // size of vector = how many barcode sets there are + for (int i = 0; i < wc_bclen.size() : i++) { + auto bclen2 = wc_bclen[i]; // i = index of current barcode set + size_t bc2 = (bclen2 + 1) / 2; + std::vector> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes + uint64_t mask_size = (1ULL << (2 * bc2)); + uint64_t lower_mask = (1ULL << (2 * bc2)) - 1; + uint64_t upper_mask = (1ULL << (2 * (wc_bclen - bc2))) - 1; + for (uint64_t b : wbc[i]) { // Iterate through barcodes of current barcode set + uint64_t lb = b & lower_mask; + uint64_t ub = (b >> (2 * bc2)) & upper_mask; + correct[ub].second.add(lb); + correct[lb].first.add(ub); + } + correct_vec.push_back(std::move(correct)); + lower_upper_mask_vec.push_back(std::make_pair(lower_mask, upper_mask)); + bc2_vec.push_back(bc2); } std::streambuf *buf = nullptr; std::ofstream busf_out; - if (!opt.stream_out) - { + if (!opt.stream_out) { busf_out.open(opt.output, std::ios::out | std::ios::binary); buf = busf_out.rdbuf(); } - else - { + else { buf = std::cout.rdbuf(); } std::ostream bus_out(buf); @@ -494,42 +530,39 @@ void bustools_correct(Bustools_opt &opt) nr = 0; BUSData bd; - for (const auto &infn : opt.files) - { + for (const auto &infn : opt.files) { std::streambuf *inbuf; std::ifstream inf; - if (!opt.stream_in) - { + if (!opt.stream_in) { inf.open(infn.c_str(), std::ios::binary); inbuf = inf.rdbuf(); - } - else - { + } else { inbuf = std::cin.rdbuf(); } std::istream in(inbuf); parseHeader(in, h); - if (!outheader_written) - { + if (!outheader_written) { writeHeader(bus_out, h); outheader_written = true; } - if (bclen == 0) - { + if (bclen == 0) { bclen = h.bclen; + size_t final_wc_bclen = 0; + + for (auto l : wc_bclen) { + final_wc_bclen += l; + } - if (bclen != wc_bclen) - { + if (bclen != final_wc_bclen) { std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << wc_bclen << std::endl << " check that your whitelist matches the technology used" << std::endl; exit(1); } } - if (umilen == 0) - { + if (umilen == 0) { umilen = h.umilen; } @@ -539,67 +572,82 @@ void bustools_correct(Bustools_opt &opt) { in.read((char *)p, N * sizeof(BUSData)); size_t rc = in.gcount() / sizeof(BUSData); - if (rc == 0) - { + if (rc == 0) { break; } nr += rc; - for (size_t i = 0; i < rc; i++) - { + for (size_t i = 0; i < rc; i++) { bd = p[i]; - auto it = wbc.find(bd.barcode & len_mask); - if (it != wbc.end()) - { - stat_white++; - bus_out.write((char *)&bd, sizeof(bd)); - } - else - { - uint64_t b = bd.barcode & len_mask; - uint64_t lb = b & lower_mask; - uint64_t ub = (b >> (2 * bc2)) & upper_mask; - uint64_t lbc = 0, ubc = 0; - int correct_lower = search_for_mismatch(correct[ub].second, bc2, lb, lbc); - int correct_upper = search_for_mismatch(correct[lb].first, wc_bclen - bc2, ub, ubc); - int nc = correct_lower + correct_upper; - if (nc != 1) - { - stat_uncorr++; + uint64_t b = bd.barcode & len_mask; + uint64_t running_len = 0; + size_t stat_white_ = 0; + size_t stat_uncorr_ = 0; + size_t stat_corr_ = 0; + uint64_t correction = 0; + std::vector > correction; // TODO: pair: first = corrected barcode; second = length + correction.resize(wbc.size()); + for (int j = wbc.size()-1; j >= 0; j--) { // Iterate through all the barcode sets + auto bclen2 = wc_bclen[j]; + running_len += bclen2; + uint64_t shift_len = 2*(running_len-bclen2); // used for masking out the least significant bits up to the current barcode + uint64_t len_mask2 = 0; // This mask = Only consider these bits (based on each barcode set) + len_mask2 = ((1ULL << (2*running_len)) - 1); + if (shift_len != 0) { + len_mask2 &= (~((1ULL << (shift_len)) - 1)); } - else if (nc == 1) - { - if (correct_lower == 1) - { - uint64_t b_corrected = (ub << (2 * bc2)) | lbc; - if (dump_bool) - { - if ((bd.barcode & len_mask) != old_barcode) - { - of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; - old_barcode = bd.barcode & len_mask; - } + len_mask2 &= ((1ULL << (2*running_len)) - 1); + len_mask2 &= len_mask; // not necessary + b &= len_mask2; + auto it = wbc[j].find(b); + if (it != wbc[j].end()) { // Barcode is in the whitelist + stat_white_++; + correction |= (b & len_mask2); + } else { + auto lower_mask = lower_upper_mask_vec[j].first; + auto upper_mask = lower_upper_mask_vec[j].second; + auto bc2 = bc2_vec[j]; + auto& correct = correct_vec[j]; + uint64_t lb = b & lower_mask; + uint64_t ub = (b >> (2 * bc2)) & upper_mask; + uint64_t lbc = 0, ubc = 0; + int correct_lower = search_for_mismatch(correct[ub].second, bc2, lb, lbc); + int correct_upper = search_for_mismatch(correct[lb].first, bclen2 - bc2, ub, ubc); + int nc = correct_lower + correct_upper; + if (nc != 1) { // Uncorrected + stat_uncorr_++; + break; + } else if (nc == 1) { + stat_corr_++; + if (correct_lower == 1) { + uint64_t b_corrected = (ub << (2 * bc2)) | lbc; + b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location + b_corrected &= len_mask2; + correction |= b_corrected; // Add onto existing correction + } else if (correct_upper == 1) { + uint64_t b_corrected = (ubc << (2 * bc2)) | lb; + b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location + b_corrected &= len_mask2; + correction |= b_corrected; // Add onto existing correction } - - bd.barcode = b_corrected | (bd.barcode & ~len_mask); - bus_out.write((char *)&bd, sizeof(bd)); - stat_corr++; } - else if (correct_upper == 1) - { - uint64_t b_corrected = (ubc << (2 * bc2)) | lb; - if (dump_bool) - { - if ((bd.barcode & len_mask) != old_barcode) - { - of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; - old_barcode = bd.barcode & len_mask; - } - } - - bd.barcode = b_corrected | (bd.barcode & ~len_mask); - bus_out.write((char *)&bd, sizeof(bd)); - stat_corr++; + } + } + if (stat_white_ == wbc.size()) { + stat_white++; + bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is + } + if (stat_uncorr_ == wbc.size()) { + stat_uncorr++; // Uncorrected; do not write BUS record + } + if (stat_corr_ > 0) { + stat_corr++; // Corrected; and write it out + bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length + bus_out.write((char *)&bd, sizeof(bd)); + if (dump_bool) { + if (b != old_barcode) { + of << binaryToString(b, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; + old_barcode = b & len_mask; } } } From 49d69adb60d626f3286e2130ecf0dabd10702953 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 10 Apr 2023 23:08:04 -0700 Subject: [PATCH 36/49] fixed stuff with multicomponent barcodes --- src/bustools_correct.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index dc55e42..ce9bb12 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -464,7 +464,7 @@ void bustools_correct(Bustools_opt &opt) { exit(1); } else if (barcode.length() != wc_bclen[i]) { std::cerr << "Error: whitelist file malformed; encountered barcode length " << wc_bclen[i] - << " on a line while " << wbc[i].length() << " barcodes on another line" + << " on a line but barcode length " << wc_bclen[i].length() << " on another line" << std::endl; exit(1); } else { @@ -496,7 +496,7 @@ void bustools_correct(Bustools_opt &opt) { std::vector>> correct_vec; // size of vector = how many barcode sets there are std::vector > lower_upper_mask_vec; // size of vector = how many barcode sets there are std::vector bc2_vec; // size of vector = how many barcode sets there are - for (int i = 0; i < wc_bclen.size() : i++) { + for (int i = 0; i < wc_bclen.size(); i++) { auto bclen2 = wc_bclen[i]; // i = index of current barcode set size_t bc2 = (bclen2 + 1) / 2; std::vector> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes @@ -556,7 +556,7 @@ void bustools_correct(Bustools_opt &opt) { } if (bclen != final_wc_bclen) { - std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << wc_bclen << std::endl + std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << final_wc_bclen << std::endl << " check that your whitelist matches the technology used" << std::endl; exit(1); @@ -585,8 +585,6 @@ void bustools_correct(Bustools_opt &opt) { size_t stat_uncorr_ = 0; size_t stat_corr_ = 0; uint64_t correction = 0; - std::vector > correction; // TODO: pair: first = corrected barcode; second = length - correction.resize(wbc.size()); for (int j = wbc.size()-1; j >= 0; j--) { // Iterate through all the barcode sets auto bclen2 = wc_bclen[j]; running_len += bclen2; @@ -646,7 +644,7 @@ void bustools_correct(Bustools_opt &opt) { bus_out.write((char *)&bd, sizeof(bd)); if (dump_bool) { if (b != old_barcode) { - of << binaryToString(b, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n"; + of << binaryToString(b, bclen) << "\t" << binaryToString(correction, bclen) << "\n"; old_barcode = b & len_mask; } } From 8274a6fd1120a1b00a6c61cab0cf69879bfa9275 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 10 Apr 2023 23:27:39 -0700 Subject: [PATCH 37/49] more fixes --- src/bustools_correct.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index ce9bb12..24a5526 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -464,7 +464,7 @@ void bustools_correct(Bustools_opt &opt) { exit(1); } else if (barcode.length() != wc_bclen[i]) { std::cerr << "Error: whitelist file malformed; encountered barcode length " << wc_bclen[i] - << " on a line but barcode length " << wc_bclen[i].length() << " on another line" + << " on a line but barcode length " << barcode.length() << " on another line" << std::endl; exit(1); } else { @@ -502,7 +502,7 @@ void bustools_correct(Bustools_opt &opt) { std::vector> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes uint64_t mask_size = (1ULL << (2 * bc2)); uint64_t lower_mask = (1ULL << (2 * bc2)) - 1; - uint64_t upper_mask = (1ULL << (2 * (wc_bclen - bc2))) - 1; + uint64_t upper_mask = (1ULL << (2 * (bclen2 - bc2))) - 1; for (uint64_t b : wbc[i]) { // Iterate through barcodes of current barcode set uint64_t lb = b & lower_mask; uint64_t ub = (b >> (2 * bc2)) & upper_mask; From 8e70f28b0f7c57bb1974d114dbe36abf9a34929c Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 11 Apr 2023 00:10:56 -0700 Subject: [PATCH 38/49] another fix --- src/bustools_correct.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 24a5526..80a06bd 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -472,8 +472,9 @@ void bustools_correct(Bustools_opt &opt) { } i++; } - if (i != wbc[i].size()) { // Incorrect number of barcodes on this line - std::cerr << "Error: whitelist file malformed; encountered " << (i+1) + if (i == 0) continue; // empty line + if (i != wbc.size()) { // Incorrect number of barcodes on this line + std::cerr << "Error: whitelist file malformed; encountered " << i << " barcodes on a line while " << wbc.size() << " barcodes on another line" << std::endl; exit(1); From 6a18f286d3a238925b827d5f892bd79855fbc104 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 11 Apr 2023 02:53:21 -0700 Subject: [PATCH 39/49] some final fixes (hopefully) --- src/bustools_correct.cpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 80a06bd..84d70cd 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -597,18 +597,19 @@ void bustools_correct(Bustools_opt &opt) { } len_mask2 &= ((1ULL << (2*running_len)) - 1); len_mask2 &= len_mask; // not necessary - b &= len_mask2; - auto it = wbc[j].find(b); + uint64_t b_ = b & len_mask2; // The barcode alone in the location that it appears in + uint64_t b_shifted = b_ >> shift_len; // The barcode shifted into the least significant bits location + auto it = wbc[j].find(b_shifted); if (it != wbc[j].end()) { // Barcode is in the whitelist stat_white_++; - correction |= (b & len_mask2); + correction |= b_; } else { auto lower_mask = lower_upper_mask_vec[j].first; auto upper_mask = lower_upper_mask_vec[j].second; auto bc2 = bc2_vec[j]; auto& correct = correct_vec[j]; - uint64_t lb = b & lower_mask; - uint64_t ub = (b >> (2 * bc2)) & upper_mask; + uint64_t lb = b_shifted & lower_mask; + uint64_t ub = (b_shifted >> (2 * bc2)) & upper_mask; uint64_t lbc = 0, ubc = 0; int correct_lower = search_for_mismatch(correct[ub].second, bc2, lb, lbc); int correct_upper = search_for_mismatch(correct[lb].first, bclen2 - bc2, ub, ubc); @@ -620,33 +621,33 @@ void bustools_correct(Bustools_opt &opt) { stat_corr_++; if (correct_lower == 1) { uint64_t b_corrected = (ub << (2 * bc2)) | lbc; - b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location + b_corrected = b_corrected << shift_len; // We have the corrected barcode in the correct location b_corrected &= len_mask2; correction |= b_corrected; // Add onto existing correction } else if (correct_upper == 1) { uint64_t b_corrected = (ubc << (2 * bc2)) | lb; - b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location + b_corrected = b_corrected << shift_len; // We have the corrected barcode in the correct location b_corrected &= len_mask2; correction |= b_corrected; // Add onto existing correction } } } } - if (stat_white_ == wbc.size()) { + if (stat_uncorr_ == 0 && stat_white_ == wbc.size()) { stat_white++; bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is } if (stat_uncorr_ == wbc.size()) { stat_uncorr++; // Uncorrected; do not write BUS record } - if (stat_corr_ > 0) { + if (stat_uncorr_ == 0 && stat_corr_ > 0) { stat_corr++; // Corrected; and write it out bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length bus_out.write((char *)&bd, sizeof(bd)); if (dump_bool) { if (b != old_barcode) { of << binaryToString(b, bclen) << "\t" << binaryToString(correction, bclen) << "\n"; - old_barcode = b & len_mask; + old_barcode = b; } } } @@ -659,13 +660,11 @@ void bustools_correct(Bustools_opt &opt) { << "Corrected = " << stat_corr << std::endl << "Uncorrected = " << stat_uncorr << std::endl; - if (!opt.stream_out) - { + if (!opt.stream_out) { busf_out.close(); } - if (opt.dump_bool) - { + if (opt.dump_bool) { of.close(); // if of is open } From 5d09ac19511dfcebf0d483ac4953e117e6d5ecc4 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 11 Apr 2023 03:09:44 -0700 Subject: [PATCH 40/49] cleanup --- src/bustools_correct.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 84d70cd..4c4b9e5 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -633,14 +633,12 @@ void bustools_correct(Bustools_opt &opt) { } } } - if (stat_uncorr_ == 0 && stat_white_ == wbc.size()) { + if (stat_uncorr_ > 0) { + stat_uncorr++; // Uncorrected; do not write BUS record + } else if (stat_white_ == wbc.size()) { stat_white++; bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is - } - if (stat_uncorr_ == wbc.size()) { - stat_uncorr++; // Uncorrected; do not write BUS record - } - if (stat_uncorr_ == 0 && stat_corr_ > 0) { + } else if (stat_corr_ > 0) { stat_corr++; // Corrected; and write it out bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length bus_out.write((char *)&bd, sizeof(bd)); From 88ffe8d351b98a7be5fff3cc0fd952e8caca4529 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 11 Apr 2023 04:21:58 -0700 Subject: [PATCH 41/49] make multipart barcodes more lax/flexible --- src/bustools_correct.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 4c4b9e5..22e5054 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -451,15 +451,18 @@ void bustools_correct(Bustools_opt &opt) { while (ss >> barcode) { std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper); uint64_t bc = stringToBinary(barcode, f); - if (first_line) { + if (first_line) { // First line establishes all the barcode sets (can't have any empty barcodes here) std::unordered_set bc_set; bc_set.insert(bc); wbc.push_back(bc_set); wbc[i].reserve(100000); wc_bclen.push_back(barcode.size()); + } else if (barcode == "-") { + i++; + continue; // Empty barcode } else if (i >= wbc.size()) { // Too many barcodes in this line std::cerr << "Error: whitelist file malformed; encountered " << (i+1) - << " barcodes on a line while " << wbc.size() << " barcodes on another line" + << " barcodes on a line while " << wbc.size() << " barcodes on a previous line" << std::endl; exit(1); } else if (barcode.length() != wc_bclen[i]) { @@ -473,12 +476,6 @@ void bustools_correct(Bustools_opt &opt) { i++; } if (i == 0) continue; // empty line - if (i != wbc.size()) { // Incorrect number of barcodes on this line - std::cerr << "Error: whitelist file malformed; encountered " << i - << " barcodes on a line while " << wbc.size() << " barcodes on another line" - << std::endl; - exit(1); - } first_line = false; } wf.close(); From d04a222f400543a35fe21fe0ff803e758656b2a9 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 11 Apr 2023 04:48:59 -0700 Subject: [PATCH 42/49] Update bustools_correct.cpp --- src/bustools_correct.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 22e5054..ff2180d 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -484,8 +484,13 @@ void bustools_correct(Bustools_opt &opt) { std::cerr << "Error: whitelist file malformed; no barcodes found" < 1) { std::cerr << "Found " << wbc.size() << " barcode sets" << std::endl; } From 58d547720ab72e357b687c565726caf8293d0a39 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Sun, 23 Apr 2023 18:33:47 -0700 Subject: [PATCH 43/49] bustools correct --replace: initial implementation --- src/Common.hpp | 1 + src/bustools_correct.cpp | 169 +++++++++++++++++++++++++++++++++++++-- src/bustools_main.cpp | 12 ++- 3 files changed, 171 insertions(+), 11 deletions(-) diff --git a/src/Common.hpp b/src/Common.hpp index 6a50ebd..b0282cc 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -85,6 +85,7 @@ struct Bustools_opt std::string dump; bool dump_bool = false; bool split_correct = false; + bool barcode_replacement = false; /* predict */ std::string predict_input; //specified the same way as the output for count - count and histogram filenames will be created from this diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index ff2180d..4019889 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include "Common.hpp" @@ -416,7 +417,161 @@ void bustools_split_correct(Bustools_opt &opt) p = nullptr; } +void bustools_correct_replace(Bustools_opt &opt) { + uint32_t bclen = 0; + uint32_t umilen = 0; + std::unordered_set wc_bclen; + BUSHeader h; + size_t nr = 0; + size_t N = 100000; + BUSData *p = new BUSData[N]; + char magic[4]; + uint32_t version = 0; + size_t stat_white = 0; + size_t stat_uncorr = 0; + uint64_t old_barcode; + + bool dump_bool = opt.dump_bool; + std::ofstream of; + if (dump_bool) { + of.open(opt.dump); + } + + std::ifstream wf(opt.whitelist, std::ios::in); + std::string line; + line.reserve(100); + std::unordered_map> rp_map; // Replacement map (key = onlisted bc; pair.first = replacement bc; pair.second = type) + uint32_t f = 0; + + while (std::getline(wf, line)) { + std::stringstream ss(line); + std::string barcode; + std::string replacement; + ss >> barcode >> replacement; + if (barcode.empty() || replacement.empty()) continue; + if (!barcode.empty() && replacement.empty()) { + std::cerr << "Error: replacement file malformed; no replacement found for barcode: " << barcode << std::endl; + exit(1); + } + std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper); + std::transform(replacement.begin(), replacement.end(), replacement.begin(), ::toupper); + uint64_t bc = stringToBinary(barcode, f); + uint64_t rp = stringToBinary(replacement, f); + rp_map.insert(std::make_pair(bc, std::make_pair(rp,0))); + wc_bclen.insert(barcode.length()); + wc_bclen.insert(replacement.length()); + } + wf.close(); + + if (rp_map.size() == 0) { + std::cerr << "Error: replacement file malformed; no barcodes found" < 1) { std::cerr << "Found " << wbc.size() << " barcode sets" << std::endl; } @@ -559,8 +714,8 @@ void bustools_correct(Bustools_opt &opt) { } if (bclen != final_wc_bclen) { - std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << final_wc_bclen << std::endl - << " check that your whitelist matches the technology used" << std::endl; + std::cerr << "Error: barcode length and on-list length differ, barcodes = " << bclen << ", on-list = " << final_wc_bclen << std::endl + << " check that your on-list matches the technology used" << std::endl; exit(1); } @@ -656,7 +811,7 @@ void bustools_correct(Bustools_opt &opt) { } std::cerr << "Processed " << nr << " BUS records" << std::endl - << "In whitelist = " << stat_white << std::endl + << "In on-list = " << stat_white << std::endl << "Corrected = " << stat_corr << std::endl << "Uncorrected = " << stat_uncorr << std::endl; diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index d19d01e..580a810 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -576,13 +576,14 @@ void parse_ProgramOptions_fromtext(int argc, char **argv, Bustools_opt& opt) { void parse_ProgramOptions_correct(int argc, char **argv, Bustools_opt &opt) { - const char *opt_string = "o:w:d:sp"; + const char *opt_string = "o:w:d:spr"; static struct option long_options[] = { {"output", required_argument, 0, 'o'}, {"whitelist", required_argument, 0, 'w'}, {"dump", required_argument, 0, 'd'}, {"split", no_argument, 0, 's'}, {"pipe", no_argument, 0, 'p'}, + {"replace", no_argument, 0, 'r'}, {0, 0, 0, 0}}; int option_index = 0, c; @@ -608,6 +609,9 @@ void parse_ProgramOptions_correct(int argc, char **argv, Bustools_opt &opt) case 'p': opt.stream_out = true; break; + case 'r': + opt.barcode_replacement = true; + break; default: break; } @@ -1604,7 +1608,7 @@ bool check_ProgramOptions_correct(Bustools_opt &opt) if (opt.whitelist.size() == 0) { - std::cerr << "Error: Missing whitelist file" << std::endl; + std::cerr << "Error: Missing on-list file" << std::endl; ret = false; } else @@ -2621,10 +2625,10 @@ void Bustools_correct_Usage() << std::endl << "Options: " << std::endl << "-o, --output File for corrected bus output" << std::endl - << "-w, --whitelist File of whitelisted barcodes to correct to" << std::endl + << "-w, --whitelist File of on-list barcodes to correct to" << std::endl << "-p, --pipe Write to standard output" << std::endl << "-d, --dump Dump uncorrected to corrected barcodes (optional)" << std::endl - << "-s, --split Split the whitelist and correct each half independently (optional)" << std::endl + << "-r, --replace The file of on-list barcodes is a barcode replacement file" << std::endl << std::endl; } From 61f988349aaf0eaeb99edf6feb9accc7e9dd83b1 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 24 Apr 2023 04:47:16 -0700 Subject: [PATCH 44/49] bustools correct: more features for replace --- src/Common.hpp | 1 + src/bustools_correct.cpp | 88 ++++++++++++++++++++++++++++++++++++---- src/bustools_main.cpp | 7 +++- src/bustools_text.cpp | 3 ++ 4 files changed, 89 insertions(+), 10 deletions(-) diff --git a/src/Common.hpp b/src/Common.hpp index b0282cc..e6cfb27 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -109,6 +109,7 @@ struct Bustools_opt /* text */ bool text_dumpflags = false; bool text_dumppad = false; + bool text_showall = false; /* linker */ int start, end; diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 4019889..0b2edc2 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -420,7 +420,9 @@ void bustools_split_correct(Bustools_opt &opt) void bustools_correct_replace(Bustools_opt &opt) { uint32_t bclen = 0; uint32_t umilen = 0; + uint32_t rplen = 0; std::unordered_set wc_bclen; + std::unordered_set r_len; BUSHeader h; size_t nr = 0; size_t N = 100000; @@ -430,6 +432,15 @@ void bustools_correct_replace(Bustools_opt &opt) { size_t stat_white = 0; size_t stat_uncorr = 0; uint64_t old_barcode; + enum replacement_type { bc_record, msb_meta, lsb_meta, msb_bus, lsb_bus }; + + // There are five replacement types: + // bc_record: simply replace the barcode of length 'bclen' with another barcode of length 'bclen' (while preserving metadata) + // msb_meta: put replacement (format: NNNN<) into most significant bits of metadata in BUS record (preserve barcode record) + // lsb_meta: put replacement (format: second.first; - bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length + auto rtype = it->second.second; + switch (rtype) { + case bc_record: + { + uint64_t len_mask2 = ((1ULL << (2*std::max(rplen, bclen))) - 1); // n least significant bits where n=2*max(rplen,bclen) [if rplen > bclen, overwrite based on rplen] + bd.barcode = correction | (bd.barcode & ~len_mask2); // Correction plus preserve the metadata bits outside barcode length (or overwrites the part where the barcode length exceeds it) + break; + } + case msb_meta: + { + uint64_t clen = 32-rplen; // 32 minus correction length + bd.barcode = bd.barcode & ((1ULL << (2*clen)) - 1); // Unset the MSBs + bd.barcode = (correction << (2*clen) ) | bd.barcode; // Shift the corrected sequence into the MSBs + break; + } + case lsb_meta: + { + uint64_t original_bc = bd.barcode & len_mask; // The original barcode sequence (no metadata) + bd.barcode = bd.barcode << (2*rplen); // Shift the barcode+metadata to the left based on rplen + uint64_t mlen = (rplen+bclen); // How much space the new metadata plus the original barcode will take up + bd.barcode = bd.barcode & (~((1ULL << (2*mlen)) - 1)); // Preserve only the bits containing the (shifted) metadata + bd.barcode = bd.barcode | original_bc; // Throw the original barcode back in + bd.barcode = (correction << (2*bclen) ) | bd.barcode; // Throw the new metadata in + break; + } + case msb_bus: + { + if (bclen >= rplen) { // Only do the substitution if barcode encapsulates the replacement + uint64_t mdata = bd.barcode & (~((1ULL << (2*bclen)) - 1)); // Preserve only the bits containing the metadata (not the barcode) + uint64_t bdata = bd.barcode & ((1ULL << (2*(bclen-rplen))) - 1); // Preserve only the bits containing the part of the barcode we want to keep + bd.barcode = (mdata | bdata) | (correction << (2*(bclen-rplen))); // Merge everything together + } + break; + } + case lsb_bus: + { + bd.barcode = correction | (bd.barcode & (~(1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in + break; + } + } bus_out.write((char *)&bd, sizeof(bd)); if (dump_bool) { if (b != old_barcode) { diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp index 580a810..fe85131 100644 --- a/src/bustools_main.cpp +++ b/src/bustools_main.cpp @@ -493,13 +493,14 @@ void parse_ProgramOptions_predict(int argc, char **argv, Bustools_opt& opt) { void parse_ProgramOptions_dump(int argc, char **argv, Bustools_opt &opt) { - const char *opt_string = "o:pfd"; + const char *opt_string = "o:pfda"; static struct option long_options[] = { {"output", required_argument, 0, 'o'}, {"pipe", no_argument, 0, 'p'}, {"flags", no_argument, 0, 'f'}, {"pad", no_argument, 0, 'd'}, + {"showAll", no_argument, 0, 'a'}, {0, 0, 0, 0}}; int option_index = 0, c; @@ -521,6 +522,9 @@ void parse_ProgramOptions_dump(int argc, char **argv, Bustools_opt &opt) case 'd': opt.text_dumppad = true; break; + case 'a': + opt.text_showall = true; + break; default: break; } @@ -2607,6 +2611,7 @@ void Bustools_dump_Usage() << "-f, --flags Write the flag column" << std::endl << "-d, --pad Write the pad column" << std::endl << "-p, --pipe Write to standard output" << std::endl + << "-a, --showAll Show hidden metadata in barcodes" << std::endl << std::endl; } diff --git a/src/bustools_text.cpp b/src/bustools_text.cpp index 2c86d3e..9eff0dc 100644 --- a/src/bustools_text.cpp +++ b/src/bustools_text.cpp @@ -46,6 +46,9 @@ void bustools_text(const Bustools_opt& opt) { parseHeader(in, h); uint32_t bclen = h.bclen; uint32_t umilen = h.umilen; + if (opt.text_showall) { + bclen = 32; + } int rc = 0; while (true) { in.read((char*)p, N * sizeof(BUSData)); From f4fd12a5205772eb7e62a04a7ebd8b40835805b8 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Mon, 24 Apr 2023 05:22:15 -0700 Subject: [PATCH 45/49] fix minor bug --- src/bustools_correct.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index 0b2edc2..bf4093a 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -602,7 +602,7 @@ void bustools_correct_replace(Bustools_opt &opt) { } case lsb_bus: { - bd.barcode = correction | (bd.barcode & (~(1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in + bd.barcode = correction | (bd.barcode & ~((1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in break; } } From 01f1ac5bee0508159895e88a9010858297550ef2 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Tue, 25 Apr 2023 01:25:36 -0700 Subject: [PATCH 46/49] fix bustools correct/replace --- src/bustools_correct.cpp | 103 ++++++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 34 deletions(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index bf4093a..f636f49 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -421,8 +421,7 @@ void bustools_correct_replace(Bustools_opt &opt) { uint32_t bclen = 0; uint32_t umilen = 0; uint32_t rplen = 0; - std::unordered_set wc_bclen; - std::unordered_set r_len; + uint32_t wc_bclen = 0; BUSHeader h; size_t nr = 0; size_t N = 100000; @@ -440,7 +439,7 @@ void bustools_correct_replace(Bustools_opt &opt) { // lsb_meta: put replacement (format: > rp_map; // Replacement map (key = onlisted bc; pair.first = replacement bc; pair.second = type) + std::unordered_map rp_map; // Replacement map (key = onlisted bc; value = replacement bc) + replacement_type rtype = bc_record; uint32_t f = 0; while (std::getline(wf, line)) { @@ -466,29 +466,46 @@ void bustools_correct_replace(Bustools_opt &opt) { } std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper); std::transform(replacement.begin(), replacement.end(), replacement.begin(), ::toupper); - replacement_type rtype = bc_record; - if (replacement[0] == '<') rtype = lsb_meta; - if (replacement[0] == '*') rtype = lsb_bus; - if (rtype != bc_record) { + replacement_type rtype_ = bc_record; + if (replacement[0] == '<') rtype_ = lsb_meta; + if (replacement[0] == '*') rtype_ = lsb_bus; + if (rtype_ != bc_record) { replacement = replacement.substr(1); } else { - if (replacement[replacement.length()-1] == '<') rtype = msb_meta; - if (replacement[replacement.length()-1] == '*') rtype = msb_bus; - if (rtype != bc_record) { + if (replacement[replacement.length()-1] == '<') rtype_ = msb_meta; + if (replacement[replacement.length()-1] == '*') rtype_ = msb_bus; + if (rtype_ != bc_record) { replacement = replacement.substr(0, replacement.length()-1); } } if (replacement.length() == 0) continue; + + if (wc_bclen == 0) { + rtype = rtype_; + wc_bclen = barcode.length(); + rplen = replacement.length(); + } + if (rtype != rtype_) { + std::cerr << "Error: Replacement types not consistent in file" << std::endl; + exit(1); + } + if (wc_bclen != barcode.length()) { + std::cerr << "Error: Barcode lengths not consistent in file" << std::endl; + exit(1); + } + uint64_t rl = replacement.length(); uint64_t bc = stringToBinary(barcode, f); uint64_t rp = stringToBinary(replacement, f); if (rp_map.find(bc) != rp_map.end()) { std::cerr << "Error: Duplicate entries found: " << barcode << std::endl; exit(1); } - rp_map.insert(std::make_pair(bc, std::make_pair(rp,rtype))); - wc_bclen.insert(barcode.length()); - r_len.insert(replacement.length()); + if (rplen != rl) { + std::cerr << "Error: replacement length in list inconsistent" << std::endl; + exit(1); + } + rp_map.insert(std::make_pair(bc, rp)); } wf.close(); @@ -496,6 +513,7 @@ void bustools_correct_replace(Bustools_opt &opt) { std::cerr << "Error: replacement file malformed; no barcodes found" < rplen) { + h.bclen = rplen; } - rplen = *(r_len.begin()); } if (umilen == 0) { umilen = h.umilen; } + if (!outheader_written) { + writeHeader(bus_out, h); + outheader_written = true; + } + int rc = 0; uint64_t len_mask = ((1ULL << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen while (true) { @@ -562,16 +575,31 @@ void bustools_correct_replace(Bustools_opt &opt) { bd = p[i]; uint64_t b = bd.barcode & len_mask; uint64_t correction = 0; - auto it = rp_map.find(b); + uint64_t b_lookup = b; + if (rtype == lsb_bus || rtype == msb_meta || rtype == lsb_meta) { + // For these, look up based off LSBs (off of wc_bclen) + b_lookup = b_lookup & ((1ULL << (2*wc_bclen)) - 1); + } + if (rtype == msb_bus) { + // For this, look up based off MSBs (from beginning of barcode) + if (bclen >= rplen) { + b_lookup = b_lookup & (~(1ULL << (2*(bclen-rplen)))); + } + } + auto it = rp_map.find(b_lookup); if (it != rp_map.end()) { stat_white++; - correction = it->second.first; - auto rtype = it->second.second; + correction = it->second; switch (rtype) { - case bc_record: + case bc_record: // This is the only option where we'll allow replacement to be shorter than barcode { uint64_t len_mask2 = ((1ULL << (2*std::max(rplen, bclen))) - 1); // n least significant bits where n=2*max(rplen,bclen) [if rplen > bclen, overwrite based on rplen] - bd.barcode = correction | (bd.barcode & ~len_mask2); // Correction plus preserve the metadata bits outside barcode length (or overwrites the part where the barcode length exceeds it) + bd.barcode = bd.barcode & ~len_mask2; // Preserve the metadata bits outside barcode length (or overwrites the part where the barcode length exceeds it) + if (rplen < bclen) { + bd.barcode = bd.barcode >> (2*(bclen-rplen)); + bd.barcode = bd.barcode & ~((1ULL << (2*(rplen))) - 1); // Delete everything within rplen (i.e. where the correction will eventually be) + } + bd.barcode = correction | (bd.barcode & ~len_mask2); // Correction break; } case msb_meta: @@ -602,7 +630,7 @@ void bustools_correct_replace(Bustools_opt &opt) { } case lsb_bus: { - bd.barcode = correction | (bd.barcode & ~((1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in + bd.barcode = correction | (bd.barcode & ~((1ULL << (2*rplen)) - 1)); // Set 2*rplen LSBs to 0, and put the new replacement in break; } } @@ -614,7 +642,14 @@ void bustools_correct_replace(Bustools_opt &opt) { } } } else { - bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is + // No correction; except shift metadata right if necessary + if (rplen < bclen) { + uint64_t shifted_bc = bd.barcode >> (2*(bclen-rplen)); + shifted_bc = shifted_bc & ~((1ULL << (2*(rplen))) - 1); // Delete everything within rplen (i.e. where the replacement would be) + bd.barcode = (bd.barcode & ((1ULL << (2*(rplen))) - 1)); // Preserve only the LSB rlen stuff + bd.barcode = shifted_bc | bd.barcode; // Merge + } + bus_out.write((char *)&bd, sizeof(bd)); stat_uncorr++; } } From 676f1065c5e3983c1533df99d26328cd50e5a5cc Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Wed, 26 Apr 2023 20:01:42 -0700 Subject: [PATCH 47/49] fix bustools correct replace --- src/bustools_correct.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp index f636f49..ef50271 100644 --- a/src/bustools_correct.cpp +++ b/src/bustools_correct.cpp @@ -643,7 +643,7 @@ void bustools_correct_replace(Bustools_opt &opt) { } } else { // No correction; except shift metadata right if necessary - if (rplen < bclen) { + if (rtype == bc_record && rplen < bclen) { uint64_t shifted_bc = bd.barcode >> (2*(bclen-rplen)); shifted_bc = shifted_bc & ~((1ULL << (2*(rplen))) - 1); // Delete everything within rplen (i.e. where the replacement would be) bd.barcode = (bd.barcode & ((1ULL << (2*(rplen))) - 1)); // Preserve only the LSB rlen stuff From 31b90d8fb15b97a6182161580514ff8da0298527 Mon Sep 17 00:00:00 2001 From: Pall Melsted Date: Wed, 31 May 2023 13:47:24 +0000 Subject: [PATCH 48/49] version bump --- src/Common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common.hpp b/src/Common.hpp index e6cfb27..badd7c4 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -12,7 +12,7 @@ #include "roaring.hh" #include "hash.hpp" -#define BUSTOOLS_VERSION "0.42.0" +#define BUSTOOLS_VERSION "0.43.0" #define u_map_ std::unordered_map enum CAPTURE_TYPE : char From 7a11c5a2e4fd5369e232050929de8415b9bf49c7 Mon Sep 17 00:00:00 2001 From: Delaney Sullivan Date: Thu, 29 Jun 2023 15:41:36 -0700 Subject: [PATCH 49/49] update bustools count prefix to always be len 16 --- src/bustools_count.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp index 6289545..8ccb6a8 100644 --- a/src/bustools_count.cpp +++ b/src/bustools_count.cpp @@ -702,7 +702,7 @@ void bustools_count(Bustools_opt &opt) { std::ofstream bcprefixof; bcprefixof.open(barcodes_prefix_ofn); for (const auto &x : barcodes) { - bcprefixof << binaryToString(x >> (2*bclen), 32-bclen) << "\n"; + bcprefixof << binaryToString(x >> (2*bclen), 16) << "\n"; // Always make prefix length 16 } bcprefixof.close(); }