From c99cd82c61879503e99e47b1953c6ba5ee32cfdb Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Sun, 22 Jan 2023 13:38:40 -0800
Subject: [PATCH 01/49] Clean up bustools count

Also added count split option (but not implemented yet)
---
 src/Common.hpp         |   1 +
 src/bustools_count.cpp | 381 ++++++++++++-----------------------------
 src/bustools_main.cpp  |  10 ++
 3 files changed, 123 insertions(+), 269 deletions(-)

diff --git a/src/Common.hpp b/src/Common.hpp
index f67ee5c..e6d13c7 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -62,6 +62,7 @@ struct Bustools_opt
   std::string count_genes;
   std::string count_ecs;
   std::string count_txp;
+  std::string count_split;
   bool count_em = false;
   bool count_cm = false;
   bool count_collapse = false;
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index e5961e5..244ba45 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -22,7 +22,11 @@ void bustools_count(Bustools_opt &opt) {
   std::vector<std::vector<int32_t>> ecmap;
 
   std::unordered_map<std::string, int32_t> txnames;
+  auto txnames_split = txnames; // copy
   parseTranscripts(opt.count_txp, txnames);
+  if (!opt.count_split.empty()) {
+    parseTranscripts(opt.count_split, txnames_split); // subset of txnames
+  }
   std::vector<int32_t> genemap(txnames.size(), -1);
   std::unordered_map<std::string, int32_t> genenames;
   parseGenes(opt.count_genes, txnames, genemap, genenames);
@@ -437,253 +441,8 @@ void bustools_count(Bustools_opt &opt) {
       of << n_rows << " " << (x+1) << " " << val << "\n";
     }
   };
-
-  for (const auto& infn : opt.files) { 
-    std::streambuf *inbuf;
-    std::ifstream inf;
-    if (!opt.stream_in) {
-      inf.open(infn.c_str(), std::ios::binary);
-      inbuf = inf.rdbuf();
-    } else {
-      inbuf = std::cin.rdbuf();
-    }
-    std::istream in(inbuf); 
-
-    parseHeader(in, h);
-    bclen = h.bclen;
-    
-    int rc = 0;
-    while (true) {
-      in.read((char*)p, N*sizeof(BUSData));
-      size_t rc = in.gcount() / sizeof(BUSData);
-      nr += rc;
-      if (rc == 0) {
-        break;
-      }
-
-      
-      for (size_t i = 0; i < rc; i++) {
-        if (p[i].barcode != current_bc) {                 
-          // output whatever is in v
-          if (!v.empty()) {
-            if (!opt.count_collapse) {
-              write_barcode_matrix(v);
-            } else {
-              write_barcode_matrix_collapsed(v);
-            }
-          }
-          v.clear();
-          current_bc = p[i].barcode;
-        }
-        v.push_back(p[i]);
-
-      }            
-    }
-    if (!v.empty()) {
-      if (!opt.count_collapse) {
-        write_barcode_matrix(v);
-      } else {
-        write_barcode_matrix_collapsed(v);
-      }
-    }
-
-    if (!opt.stream_in) {
-      inf.close();
-    }
-  }
-  delete[] p; p = nullptr;
-
-  if (!opt.count_collapse) {
-    n_cols = ecmap.size();
-  } else {
-    n_cols = genenames.size();
-  }
-
-  of.close();
   
-  //Rewrite header in a way that works for both Windows and Linux
-  std::stringstream ss;
-  ss << n_rows << " " << n_cols << " " << n_entries;
-  std::string header = ss.str();
-  int hlen = header.size();
-  header = header + std::string(66 - hlen, ' ') + '\n';
-  of.open(mtx_ofn, std::ios::in | std::ios::out);
-  of << headerComments << header;
-  of.close();
-
-  // write updated ec file
-  h.ecs = std::move(ecmap);
-  if (!opt.count_collapse) {
-    writeECs(ec_ofn, h);
-  } else {
-    writeGenes(gene_ofn, genenames);
-  }
-  // write barcode file
-  std::ofstream bcof;
-  bcof.open(barcodes_ofn);
-  for (const auto &x : barcodes) {
-    bcof << binaryToString(x, bclen) << "\n";
-  }
-  bcof.close();
-
-  //write histogram file
-  if (opt.count_gen_hist) {
-	std::ofstream histof;
-	histof.open(hist_ofn);
-
-	for (size_t g = 0; g < genenames.size(); ++g) {
-		//Indexed as gene*histmax + histIndex
-		unsigned int offs = g * histmax;
-		
-		//first figure out the length of the histogram, don't write that to make the file smaller
-		unsigned int histEnd = histmax - 1;
-		for (; histEnd != 0; --histEnd) {
-			if (histograms[offs + histEnd] != 0) {
-				break;
-			}
-		}
-		for (size_t c = 0; c <= histEnd; ++c) {
-			if (c != 0) {
-				histof << '\t';
-			}
-			histof << histograms[offs + c];
-		}
-
-		histof << "\n";
-	}
-	histof.close();
-  }
-  
-  if (opt.count_gen_hist) {
-	//write mean counts per UMI file (per gene)
-	
-	std::ofstream cuof;
-	cuof.open(cu_ofn);
-	//write header
-	cuof << "gene\tCU\tUMIs\n"; 
-
-	//prepare gene names for writing
-	std::vector<std::string> names;
-    names.resize(genenames.size());
-	for (const auto &x : genenames) {
-		if (x.second >= 0) {
-			names[x.second] = x.first;
-		}
-	}
-
-
-	for (size_t g = 0; g < genenames.size(); ++g) {
-		//Indexed as gene*histmax + histIndex
-		unsigned int offs = g * histmax;
-		
-		//calculate counts per UMI as the mean of the histogram
-		double wsum = 0;
-		double sum = 0;
-		for (size_t c = 0; c < histmax; ++c) {
-			wsum += double(c+1) * histograms[offs + c];
-			sum += histograms[offs + c];
-		}
-		double cu = wsum/sum;
-		if (sum == 0) {
-			cuof << names[g] << '\t' << "NA" << '\t' << sum << '\n';
-		} else {
-			cuof << names[g] << '\t' << cu << '\t' << sum << '\n';
-		}
-	}
-	cuof.close();
-	
-	//write cu per cell file
-	
-	std::ofstream cupcof;
-	cupcof.open(cu_per_cell_ofn);
-	//write header
-	cupcof << "barcode\tCU\tUMIs\n"; 
-
-	for (size_t bc = 0; bc < barcodes.size(); ++bc) {
-		cupcof << binaryToString(barcodes[bc], bclen) << '\t' << double(cellCounts[bc]) / double(cellUMIs[bc]) << '\t' << cellUMIs[bc] << '\n';
-	}
-	cupcof.close();
-  }
-  
-
-  //std::cerr << "bad counts = " << bad_count <<", rescued  =" << rescued << ", compacted = " << compacted << std::endl;
-
-  //std::cerr << "Read in " << nr << " BUS records" << std::endl;
-}
-
-void bustools_count_mult(Bustools_opt &opt) {
-  BUSHeader h;
-  size_t nr = 0;
-  size_t N = 100000;
-  uint32_t bclen = 0;
-  BUSData* p = new BUSData[N];
-
-  // read and parse the equivalence class files
-
-  std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
-  std::vector<std::vector<int32_t>> ecmap;
-
-  std::unordered_map<std::string, int32_t> txnames;
-  parseTranscripts(opt.count_txp, txnames);
-  std::vector<int32_t> genemap(txnames.size(), -1);
-  std::unordered_map<std::string, int32_t> genenames;
-  parseGenes(opt.count_genes, txnames, genemap, genenames);
-  parseECs(opt.count_ecs, h);
-  ecmap = std::move(h.ecs);
-  ecmapinv.reserve(ecmap.size());
-  for (int32_t ec = 0; ec < ecmap.size(); ec++) {
-    ecmapinv.insert({ecmap[ec], ec});
-  }
-  std::vector<std::vector<int32_t>> ec2genes;        
-  create_ec2genes(ecmap, genemap, ec2genes);
-
-
-  std::ofstream of;
-  std::string mtx_ofn = opt.output + ".mtx";
-  std::string barcodes_ofn = opt.output + ".barcodes.txt";
-  std::string ec_ofn = opt.output + ".ec.txt";
-  std::string gene_ofn = opt.output + ".genes.txt";
-  of.open(mtx_ofn); 
-
-  // write out the initial header
-  of << "%%MatrixMarket matrix coordinate real general\n%\n";
-  // number of genes
-  auto mat_header_pos = of.tellp();
-  std::string dummy_header(66, '\n');
-  for (int i = 0; i < 33; i++) {
-    dummy_header[2*i] = '%';
-  }
-  of.write(dummy_header.c_str(), dummy_header.size());
-
-
-  size_t n_cols = 0;
-  size_t n_rows = 0;
-  size_t n_entries = 0;
-  std::vector<BUSData> v;
-  v.reserve(N);
-  uint64_t current_bc = 0xFFFFFFFFFFFFFFFFULL;
-  //temporary data
-  std::vector<int32_t> ecs;
-  std::vector<int32_t> glist;
-  ecs.reserve(100);
-  std::vector<int32_t> u;
-  u.reserve(100);
-  std::vector<int32_t> column_v;
-  std::vector<std::pair<int32_t, double>> column_vp;
-  if (!opt.count_collapse) {
-    column_vp.reserve(N); 
-  } else {
-    column_vp.reserve(N);
-    glist.reserve(100);
-  }
-  //barcodes 
-  std::vector<uint64_t> barcodes;
-  int bad_count = 0;
-  int compacted = 0;
-  int rescued = 0;
-
-
-  auto write_barcode_matrix = [&](const std::vector<BUSData> &v) {
+  auto write_barcode_matrix_mult = [&](const std::vector<BUSData> &v) {
     if(v.empty()) {
       return;
     }
@@ -724,7 +483,7 @@ void bustools_count_mult(Bustools_opt &opt) {
     }
   };
   
-  auto write_barcode_matrix_collapsed = [&](const std::vector<BUSData> &v) {
+  auto write_barcode_matrix_collapsed_mult = [&](const std::vector<BUSData> &v) {
     if(v.empty()) {
       return;
     }
@@ -734,7 +493,7 @@ void bustools_count_mult(Bustools_opt &opt) {
     barcodes.push_back(v[0].barcode);
     double val = 0.0;
     size_t n = v.size();
-
+    
     for (size_t i = 0; i < n; i++) {
       ecs.resize(0);
       ecs.push_back(v[i].ec);
@@ -753,12 +512,12 @@ void bustools_count_mult(Bustools_opt &opt) {
         }
       }
     }
-
+    
     std::sort(column_vp.begin(), column_vp.end());
     size_t m = column_vp.size();
     std::unordered_map<int32_t, double> col_map(m);
     std::vector<int32_t> cols;
-
+    
     for (size_t i = 0; i < m; ) {
       size_t j = i+1;
       double val = column_vp[i].second;
@@ -770,14 +529,12 @@ void bustools_count_mult(Bustools_opt &opt) {
       }
       col_map.insert({column_vp[i].first,val});
       cols.push_back(column_vp[i].first);
-
+      
       n_entries++;
       
       i = j; // increment
     }
-
-
-
+    
     for (const auto &x : cols) {
       double val = 0;
       auto it = col_map.find(x);
@@ -786,7 +543,7 @@ void bustools_count_mult(Bustools_opt &opt) {
       }
       of << n_rows << " " << (x+1) << " " << val << "\n";
     }
-
+    
   };
 
   for (const auto& infn : opt.files) { 
@@ -818,11 +575,13 @@ void bustools_count_mult(Bustools_opt &opt) {
           // output whatever is in v
           if (!v.empty()) {
             if (!opt.count_collapse) {
-              write_barcode_matrix(v);
-              } else {
-                write_barcode_matrix_collapsed(v);
-              }
+              if (!opt.count_cm) write_barcode_matrix(v);
+              else write_barcode_matrix_mult(v);
+            } else {
+              if (!opt.count_cm) write_barcode_matrix_collapsed(v);
+              else write_barcode_matrix_collapsed_mult(v);
             }
+          }
           v.clear();
           current_bc = p[i].barcode;
         }
@@ -832,9 +591,11 @@ void bustools_count_mult(Bustools_opt &opt) {
     }
     if (!v.empty()) {
       if (!opt.count_collapse) {
-        write_barcode_matrix(v);
+        if (!opt.count_cm) write_barcode_matrix(v);
+        else write_barcode_matrix_mult(v);
       } else {
-        write_barcode_matrix_collapsed(v);
+        if (!opt.count_cm) write_barcode_matrix_collapsed(v);
+        else write_barcode_matrix_collapsed_mult(v);
       }
     }
 
@@ -852,17 +613,14 @@ void bustools_count_mult(Bustools_opt &opt) {
 
   of.close();
   
+  //Rewrite header in a way that works for both Windows and Linux
   std::stringstream ss;
-  ss << n_rows << " " << n_cols << " " << n_entries << "\n";
+  ss << n_rows << " " << n_cols << " " << n_entries;
   std::string header = ss.str();
   int hlen = header.size();
-  assert(hlen < 66);
-  of.open(mtx_ofn, std::ios::binary | std::ios::in | std::ios::out);
-  of.seekp(mat_header_pos);
-  of.write("%",1);
-  of.write(std::string(66-hlen-2,' ').c_str(),66-hlen-2);
-  of.write("\n",1);
-  of.write(header.c_str(), hlen);
+  header = header + std::string(66 - hlen, ' ') + '\n';
+  of.open(mtx_ofn, std::ios::in | std::ios::out);
+  of << headerComments << header;
   of.close();
 
   // write updated ec file
@@ -879,7 +637,92 @@ void bustools_count_mult(Bustools_opt &opt) {
     bcof << binaryToString(x, bclen) << "\n";
   }
   bcof.close();
+
+  //write histogram file
+  if (opt.count_gen_hist) {
+	std::ofstream histof;
+	histof.open(hist_ofn);
+
+	for (size_t g = 0; g < genenames.size(); ++g) {
+		//Indexed as gene*histmax + histIndex
+		unsigned int offs = g * histmax;
+		
+		//first figure out the length of the histogram, don't write that to make the file smaller
+		unsigned int histEnd = histmax - 1;
+		for (; histEnd != 0; --histEnd) {
+			if (histograms[offs + histEnd] != 0) {
+				break;
+			}
+		}
+		for (size_t c = 0; c <= histEnd; ++c) {
+			if (c != 0) {
+				histof << '\t';
+			}
+			histof << histograms[offs + c];
+		}
+
+		histof << "\n";
+	}
+	histof.close();
+  }
+  
+  if (opt.count_gen_hist) {
+	//write mean counts per UMI file (per gene)
+	
+	std::ofstream cuof;
+	cuof.open(cu_ofn);
+	//write header
+	cuof << "gene\tCU\tUMIs\n"; 
+
+	//prepare gene names for writing
+	std::vector<std::string> names;
+    names.resize(genenames.size());
+	for (const auto &x : genenames) {
+		if (x.second >= 0) {
+			names[x.second] = x.first;
+		}
+	}
+
+
+	for (size_t g = 0; g < genenames.size(); ++g) {
+		//Indexed as gene*histmax + histIndex
+		unsigned int offs = g * histmax;
+		
+		//calculate counts per UMI as the mean of the histogram
+		double wsum = 0;
+		double sum = 0;
+		for (size_t c = 0; c < histmax; ++c) {
+			wsum += double(c+1) * histograms[offs + c];
+			sum += histograms[offs + c];
+		}
+		double cu = wsum/sum;
+		if (sum == 0) {
+			cuof << names[g] << '\t' << "NA" << '\t' << sum << '\n';
+		} else {
+			cuof << names[g] << '\t' << cu << '\t' << sum << '\n';
+		}
+	}
+	cuof.close();
+	
+	//write cu per cell file
+	
+	std::ofstream cupcof;
+	cupcof.open(cu_per_cell_ofn);
+	//write header
+	cupcof << "barcode\tCU\tUMIs\n"; 
+
+	for (size_t bc = 0; bc < barcodes.size(); ++bc) {
+		cupcof << binaryToString(barcodes[bc], bclen) << '\t' << double(cellCounts[bc]) / double(cellUMIs[bc]) << '\t' << cellUMIs[bc] << '\n';
+	}
+	cupcof.close();
+  }
+  
+
   //std::cerr << "bad counts = " << bad_count <<", rescued  =" << rescued << ", compacted = " << compacted << std::endl;
 
   //std::cerr << "Read in " << nr << " BUS records" << std::endl;
 }
+
+void bustools_count_mult(Bustools_opt &opt) {
+  bustools_count(opt);
+}
diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index ab954a8..853bcd2 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -1770,6 +1770,15 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
     }
   }
   
+  if (opt.count_split.size() != 0)
+  {
+    if (!checkFileExists(opt.count_split))
+    {
+      std::cerr << "Error: File not found " << opt.count_split << std::endl;
+      ret = false;
+    }
+  }
+  
   return ret;
 }
 
@@ -2657,6 +2666,7 @@ void Bustools_count_Usage()
             << "    --umi-gene        Perform gene-level collapsing of UMIs" << std::endl
             << "    --em              Estimate gene abundances using EM algorithm" << std::endl
             << "    --cm              Count multiplicites instead of UMIs" << std::endl
+            << "-s, --split           Split output matrix in two (plus ambiguous) based on transcripts supplied in this file" << std::endl
             << "-m, --multimapping    Include bus records that pseudoalign to multiple genes" << std::endl
             << "    --hist            Output copy per UMI histograms for all genes" << std::endl 
             << "-d  --downsample      Specify a factor between 0 and 1 specifying how much to downsample" << std::endl 

From 26999ad51f8d9959b1e32c85c5999dbbf6f36fe7 Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 10:16:06 -0800
Subject: [PATCH 02/49] Preliminary untested count split matrix option

---
 src/Common.cpp         |  27 ++++++
 src/Common.hpp         |   8 ++
 src/bustools_count.cpp | 212 +++++++++++++++++++++++------------------
 src/bustools_main.cpp  |   4 +
 4 files changed, 160 insertions(+), 91 deletions(-)

diff --git a/src/Common.cpp b/src/Common.cpp
index ad96c96..717ee24 100644
--- a/src/Common.cpp
+++ b/src/Common.cpp
@@ -327,6 +327,33 @@ void create_ec2genes(const std::vector<std::vector<int32_t>> &ecmap, const std::
   }
 }
 
+COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split) {
+  if (tx_split.size() == 0) return COUNT_DEFAULT;
+  std::vector<int32_t> ecs;
+  ecs.push_back(ec);
+  return intersect_ecs_with_subset_txs(ecs, ecmap, tx_split);
+}
+
+COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector<int32_t>& ecs, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split) {
+  if (tx_split.size() == 0) return COUNT_DEFAULT;
+  if (ecs.size() == 0) return COUNT_AMBIGUOUS; // Shouldn't happen
+  size_t n_1 = 0;
+  size_t n_2 = 0;
+  for (auto ec : ecs) { // We still need to optimize this
+    for (auto t: ecmap[ec]) {
+      if(std::find(tx_split.begin(), tx_split.end(), t) != tx_split.end()) {
+        n_2++;
+      } else {
+        n_1++;
+      }
+      if (n_1 > 0 && n_2 > 0) break; // Stop searching
+    }
+    if (n_1 > 0 && n_2 > 0) break; // Stop searching
+  }
+  return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT));
+}
+
+
 void copy_file(std::string src, std::string dest) {
 	std::ifstream  isrc(src, std::ios::binary);
 	std::ofstream  idest(dest, std::ios::binary);
diff --git a/src/Common.hpp b/src/Common.hpp
index e6d13c7..006dd80 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -35,6 +35,12 @@ enum PROJECT_TYPE : char
   PROJECT_TX,
   PROJECT_F
 };
+enum COUNT_MTX_TYPE : char
+{
+  COUNT_DEFAULT = 0,
+    COUNT_SPLIT,
+    COUNT_AMBIGUOUS
+};
 
 struct Bustools_opt
 {
@@ -168,6 +174,8 @@ void vt2gene(const std::vector<int32_t> &v, const std::vector<int32_t> &genemap,
 void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const std::vector<std::vector<int32_t>> &ec2genes, std::vector<int32_t> &glist);
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
 void create_ec2genes(const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ec2gene);
+COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split);
+COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector<int32_t>& ecs, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split);
 
 void copy_file(std::string src, std::string dest);
 
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index 244ba45..e0d787c 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -23,10 +23,10 @@ void bustools_count(Bustools_opt &opt) {
 
   std::unordered_map<std::string, int32_t> txnames;
   auto txnames_split = txnames; // copy
+  std::vector<int32_t> tx_split;
+  tx_split.reserve(txnames_split.size());
+  for (auto x : txnames_split) tx_split.push_back(txnames[x.first]);
   parseTranscripts(opt.count_txp, txnames);
-  if (!opt.count_split.empty()) {
-    parseTranscripts(opt.count_split, txnames_split); // subset of txnames
-  }
   std::vector<int32_t> genemap(txnames.size(), -1);
   std::unordered_map<std::string, int32_t> genenames;
   parseGenes(opt.count_genes, txnames, genemap, genenames);
@@ -40,15 +40,19 @@ void bustools_count(Bustools_opt &opt) {
   create_ec2genes(ecmap, genemap, ec2genes);
 
 
+  bool count_split = !opt.count_split.empty();
   std::ofstream of;
+  std::ofstream of_2;
+  std::ofstream of_A;
   std::string mtx_ofn = opt.output + ".mtx";
+  std::string mtx_ofn_split_2 = opt.output + ".2.mtx";
+  std::string mtx_ofn_split_A = opt.output + ".ambiguous.mtx";
   std::string barcodes_ofn = opt.output + ".barcodes.txt";
   std::string ec_ofn = opt.output + ".ec.txt";
   std::string gene_ofn = opt.output + ".genes.txt";
   std::string hist_ofn = opt.output + ".hist.txt";
   std::string cu_per_cell_ofn = opt.output + ".CUPerCell.txt";
   std::string cu_ofn = opt.output + ".cu.txt";
-  of.open(mtx_ofn);
 
   // write out the initial header
   // keep the number of newlines constant, this way it will work for both Windows and Linux
@@ -58,12 +62,22 @@ void bustools_count(Bustools_opt &opt) {
   ssHeader << headerComments;
   ssHeader << std::string(66, '%') << '\n';
   size_t headerLength = ssHeader.str().length();
+  // If we need to split matrix
+  if (count_split) {
+    parseTranscripts(opt.count_split, txnames_split); // subset of txnames
+    of_2.open(mtx_ofn_split_2);
+    of_A.open(mtx_ofn_split_A);
+    of_2 << ssHeader.str();
+    of_A << ssHeader.str();
+  }
+  of.open(mtx_ofn);
   of << ssHeader.str();
 
-
   size_t n_cols = 0;
   size_t n_rows = 0;
   size_t n_entries = 0;
+  size_t n_entries_2 = 0;
+  size_t n_entries_A = 0;
   std::vector<BUSData> v;
   v.reserve(N);
   uint64_t current_bc = 0xFFFFFFFFFFFFFFFFULL;
@@ -74,7 +88,7 @@ void bustools_count(Bustools_opt &opt) {
   std::vector<int32_t> u;
   u.reserve(100);
   std::vector<int32_t> column_v;
-  std::vector<std::pair<int32_t, double>> column_vp;
+  std::vector<std::pair<int32_t, std::pair<double, COUNT_MTX_TYPE>>> column_vp; // gene, {count, matrix type}
   if (!opt.count_collapse) {
     column_v.reserve(N); 
   } else {
@@ -217,8 +231,11 @@ void bustools_count(Bustools_opt &opt) {
         }
       }
       double val = j-i;
-      of << n_rows << " " << (column_v[i]+1) << " " << val << "\n";
-      n_entries++;
+      auto which_mtx = intersect_ecs_with_subset_txs(column_v[i], ecmap, tx_split);
+      auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A);
+      auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A);
+      of_ << n_rows << " " << (column_v[i]+1) << " " << val << "\n";
+      n_entries_++;
       
       i = j; // increment
     }
@@ -239,7 +256,7 @@ void bustools_count(Bustools_opt &opt) {
 
     std::vector<std::vector<int32_t>> ambiguous_genes;
 
-    for (size_t i = 0; i < n; ) {
+    if (!opt.count_cm) for (size_t i = 0; i < n; ) { // Entire loop is for !opt.count_cm
       size_t j = i+1;
       for (; j < n; j++) {
         if (v[i].UMI != v[j].UMI) {
@@ -270,9 +287,10 @@ void bustools_count(Bustools_opt &opt) {
         }
       }
       if (gn > 0) {
+        auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split);
         if (opt.count_gene_multimapping) {
           for (auto x : glist) {
-            column_vp.push_back({x, (opt.count_raw_counts ? counts : 1.0)/gn});
+            column_vp.push_back({x, {(opt.count_raw_counts ? counts : 1.0)/gn, which_mtx}});
           }
           //Fill in histograms for prediction.
           if (opt.count_gen_hist) {
@@ -288,7 +306,7 @@ void bustools_count(Bustools_opt &opt) {
           }
         } else {
           if (gn==1) {
-            column_vp.push_back({glist[0],opt.count_raw_counts ? counts : 1.0});
+            column_vp.push_back({glist[0],{opt.count_raw_counts ? counts : 1.0, which_mtx}});
             //Fill in histograms for prediction.
             if (opt.count_gen_hist) {
               if (glist[0] < n_genes) { //crasches with an invalid gene file otherwise
@@ -334,13 +352,14 @@ void bustools_count(Bustools_opt &opt) {
           }
           gn = glist.size();
           if (gn > 0) {
+            auto which_mtx = intersect_ecs_with_subset_txs(ecs_within_molecule, ecmap, tx_split);
             if (opt.count_gene_multimapping) {
               for (auto x : glist) {
-                column_vp.push_back({x, 1.0/gn});
+                column_vp.push_back({x, {1.0/gn, which_mtx}});
               }
             } else {
               if (gn==1) {
-                column_vp.push_back({glist[0],1.0});
+                column_vp.push_back({glist[0],{1.0, which_mtx}});
               } else if (opt.count_em) {
                 ambiguous_genes.push_back(std::move(glist));
               }
@@ -349,25 +368,64 @@ void bustools_count(Bustools_opt &opt) {
         }
       }
       i = j; // increment
+    } else for (size_t i = 0; i < n; i++) { // Entire loop is for opt.count_cm
+      ecs.resize(0);
+      ecs.push_back(v[i].ec);
+      
+      intersect_genes_of_ecs(ecs, ec2genes, glist);
+      int gn = glist.size();
+      if (gn > 0) {
+        auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split);
+        if (opt.count_gene_multimapping) {
+          for (auto x : glist) {
+            column_vp.push_back({x, {v[i].count/gn, which_mtx}});
+          }
+        } else {
+          if (gn==1) {
+            column_vp.push_back({glist[0],{v[i].count, which_mtx}});
+          } 
+        }
+      }
     }
     std::sort(column_vp.begin(), column_vp.end());
     size_t m = column_vp.size();
     std::unordered_map<int32_t, double> col_map(m);
+    auto col_map_2 = col_map; // copy
+    auto col_map_A = col_map; // copy
     std::vector<int32_t> cols;
 
     for (size_t i = 0; i < m; ) {
       size_t j = i+1;
-      double val = column_vp[i].second;
+      double val = 0;
+      double val_2 = 0;
+      double val_A = 0;
+      auto mtx_type = column_vp[i].second.second;
+      if (mtx_type == COUNT_DEFAULT) val = column_vp[i].second.first;
+      else if (mtx_type == COUNT_SPLIT) val_2 = column_vp[i].second.first;
+      else val_A = column_vp[i].second.first;
       for (; j < m; j++) {
         if (column_vp[i].first != column_vp[j].first) {
           break;
         }
-        val += column_vp[j].second;
+        auto mtx_type = column_vp[j].second.second;
+        if (mtx_type == COUNT_DEFAULT) val += column_vp[j].second.first;
+        else if (mtx_type == COUNT_SPLIT) val_2 += column_vp[j].second.first;
+        else val_A += column_vp[j].second.first;
       }
       col_map.insert({column_vp[i].first,val});
+      if (count_split) {
+        col_map_2.insert({column_vp[i].first,val_2});
+        col_map_A.insert({column_vp[i].first,val_A});
+      }
       cols.push_back(column_vp[i].first);
 
-      n_entries++;
+      if (count_split) {
+        if (val > 0) n_entries++;
+        if (val_2 > 0) n_entries_2++;
+        if (val_A > 0) n_entries_A++;
+      } else {
+        n_entries++;
+      }
       
       i = j; // increment
     }
@@ -380,7 +438,7 @@ void bustools_count(Bustools_opt &opt) {
         double val = 0;
         auto it = col_map.find(x);
         if (it != col_map.end()) {
-          val = it->second;
+          val = it->second.first;
         }
         c1.insert({x,val});
         c2.insert({x,0.0});
@@ -430,15 +488,28 @@ void bustools_count(Bustools_opt &opt) {
 
     }
 
-
-
     for (const auto &x : cols) {
       double val = 0;
       auto it = col_map.find(x);
-      if (it != col_map.end()) {
-        val = it->second;
+      if (!count_split) {
+        if (it != col_map.end()) val = it->second;
+        of << n_rows << " " << (x+1) << " " << val << "\n";
+      } else {
+        if (it != col_map.end()) {
+          val = it->second;
+          of << n_rows << " " << (x+1) << " " << val << "\n";
+        }
+        it = col_map_2.find(x);
+        if (it != col_map_2.end()) {
+          val = it->second;
+          of_2 << n_rows << " " << (x+1) << " " << val << "\n";
+        }
+        it = col_map_A.find(x);
+        if (it != col_map_A.end()) {
+          val = it->second;
+          of_A << n_rows << " " << (x+1) << " " << val << "\n";
+        }
       }
-      of << n_rows << " " << (x+1) << " " << val << "\n";
     }
   };
   
@@ -464,7 +535,7 @@ void bustools_count(Bustools_opt &opt) {
           continue;
         }
       }
-      column_vp.push_back({ec,v[i].count});
+      column_vp.push_back({ec,{v[i].count,0}});
     }
     std::sort(column_vp.begin(), column_vp.end());
     size_t m = column_vp.size();
@@ -477,74 +548,14 @@ void bustools_count(Bustools_opt &opt) {
         }
         val += column_vp[j].second;
       }
-      n_entries++;
-      of << n_rows << " " << (column_vp[i].first+1) << " " << val << "\n";
+      auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split);
+      auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A);
+      auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A);
+      of_ << n_rows << " " << (column_vp[i].first+1) << " " << val << "\n";
+      n_entries_++;
       i = j; // increment
     }
   };
-  
-  auto write_barcode_matrix_collapsed_mult = [&](const std::vector<BUSData> &v) {
-    if(v.empty()) {
-      return;
-    }
-    column_vp.resize(0);
-    n_rows+= 1;
-    
-    barcodes.push_back(v[0].barcode);
-    double val = 0.0;
-    size_t n = v.size();
-    
-    for (size_t i = 0; i < n; i++) {
-      ecs.resize(0);
-      ecs.push_back(v[i].ec);
-      
-      intersect_genes_of_ecs(ecs, ec2genes, glist);
-      int gn = glist.size();
-      if (gn > 0) {
-        if (opt.count_gene_multimapping) {
-          for (auto x : glist) {
-            column_vp.push_back({x, v[i].count/gn});
-          }
-        } else {
-          if (gn==1) {
-            column_vp.push_back({glist[0],v[i].count});
-          } 
-        }
-      }
-    }
-    
-    std::sort(column_vp.begin(), column_vp.end());
-    size_t m = column_vp.size();
-    std::unordered_map<int32_t, double> col_map(m);
-    std::vector<int32_t> cols;
-    
-    for (size_t i = 0; i < m; ) {
-      size_t j = i+1;
-      double val = column_vp[i].second;
-      for (; j < m; j++) {
-        if (column_vp[i].first != column_vp[j].first) {
-          break;
-        }
-        val += column_vp[j].second;
-      }
-      col_map.insert({column_vp[i].first,val});
-      cols.push_back(column_vp[i].first);
-      
-      n_entries++;
-      
-      i = j; // increment
-    }
-    
-    for (const auto &x : cols) {
-      double val = 0;
-      auto it = col_map.find(x);
-      if (it != col_map.end()) {
-        val = it->second;
-      }
-      of << n_rows << " " << (x+1) << " " << val << "\n";
-    }
-    
-  };
 
   for (const auto& infn : opt.files) { 
     std::streambuf *inbuf;
@@ -578,8 +589,7 @@ void bustools_count(Bustools_opt &opt) {
               if (!opt.count_cm) write_barcode_matrix(v);
               else write_barcode_matrix_mult(v);
             } else {
-              if (!opt.count_cm) write_barcode_matrix_collapsed(v);
-              else write_barcode_matrix_collapsed_mult(v);
+              write_barcode_matrix_collapsed(v); // Same signature for count_cm and !count_cm
             }
           }
           v.clear();
@@ -612,9 +622,13 @@ void bustools_count(Bustools_opt &opt) {
   }
 
   of.close();
+  if (count_split) {
+    of_2.close();
+    of_A.close();
+  }
   
   //Rewrite header in a way that works for both Windows and Linux
-  std::stringstream ss;
+  std::stringstream ss, ss_2, ss_A;
   ss << n_rows << " " << n_cols << " " << n_entries;
   std::string header = ss.str();
   int hlen = header.size();
@@ -622,6 +636,22 @@ void bustools_count(Bustools_opt &opt) {
   of.open(mtx_ofn, std::ios::in | std::ios::out);
   of << headerComments << header;
   of.close();
+  if (count_split) {
+    ss_2 << n_rows << " " << n_cols << " " << n_entries_2;
+    header = ss_2.str();
+    hlen = header.size();
+    header = header + std::string(66 - hlen, ' ') + '\n';
+    of_2.open(mtx_ofn_split_2, std::ios::in | std::ios::out);
+    of_2 << headerComments << header;
+    of_2.close();
+    ss_A << n_rows << " " << n_cols << " " << n_entries_A;
+    header = ss_A.str();
+    hlen = header.size();
+    header = header + std::string(66 - hlen, ' ') + '\n';
+    of_A.open(mtx_ofn_split_A, std::ios::in | std::ios::out);
+    of_A << headerComments << header;
+    of_A.close();
+  }
 
   // write updated ec file
   h.ecs = std::move(ecmap);
diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index 853bcd2..2ffd492 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -1777,6 +1777,10 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
       std::cerr << "Error: File not found " << opt.count_split << std::endl;
       ret = false;
     }
+    if (opt.count_em) {
+      std::cerr << "Cannot use -s with --em" std::endl;
+      ret = false;
+    }
   }
   
   return ret;

From 53b0acc550c9b74289b84faf60489fdfd2998f62 Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 11:52:31 -0800
Subject: [PATCH 03/49] fix some typos

---
 src/bustools_count.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index e0d787c..68e9bab 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -438,7 +438,7 @@ void bustools_count(Bustools_opt &opt) {
         double val = 0;
         auto it = col_map.find(x);
         if (it != col_map.end()) {
-          val = it->second.first;
+          val = it->second;
         }
         c1.insert({x,val});
         c2.insert({x,0.0});
@@ -535,18 +535,18 @@ void bustools_count(Bustools_opt &opt) {
           continue;
         }
       }
-      column_vp.push_back({ec,{v[i].count,0}});
+      column_vp.push_back({ec,{v[i].count,COUNT_DEFAULT}});
     }
     std::sort(column_vp.begin(), column_vp.end());
     size_t m = column_vp.size();
     for (size_t i = 0; i < m; ) {
       size_t j = i+1;
-      double val = column_vp[i].second;
+      double val = column_vp[i].second.first;
       for (; j < m; j++) {
         if (column_vp[i].first != column_vp[j].first) {
           break;
         }
-        val += column_vp[j].second;
+        val += column_vp[j].second.first;
       }
       auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split);
       auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A);
@@ -557,7 +557,7 @@ void bustools_count(Bustools_opt &opt) {
     }
   };
 
-  for (const auto& infn : opt.files) { 
+  for (const auto& infn : opt.files) {
     std::streambuf *inbuf;
     std::ifstream inf;
     if (!opt.stream_in) {
@@ -604,8 +604,7 @@ void bustools_count(Bustools_opt &opt) {
         if (!opt.count_cm) write_barcode_matrix(v);
         else write_barcode_matrix_mult(v);
       } else {
-        if (!opt.count_cm) write_barcode_matrix_collapsed(v);
-        else write_barcode_matrix_collapsed_mult(v);
+        write_barcode_matrix_collapsed(v); // Same signature for count_cm and !count_cm
       }
     }
 

From 0d8cec30ffbcf983685e944324abc9cd4740c997 Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 11:53:33 -0800
Subject: [PATCH 04/49] fix another typo

---
 src/bustools_main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index 2ffd492..384dc2f 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -1778,7 +1778,7 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
       ret = false;
     }
     if (opt.count_em) {
-      std::cerr << "Cannot use -s with --em" std::endl;
+      std::cerr << "Cannot use -s with --em" << std::endl;
       ret = false;
     }
   }

From a2f38379bdc606effd27d5113c290f382be23f20 Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 15:32:53 -0800
Subject: [PATCH 05/49] change unordered_map

---
 src/BUSData.cpp              |   10 +-
 src/BUSData.h                |   11 +-
 src/Common.hpp               |    6 +-
 src/bustools_capture.cpp     |    4 +-
 src/bustools_clusterhist.cpp |    8 +-
 src/bustools_collapse.cpp    |    6 +-
 src/bustools_count.cpp       |   10 +-
 src/bustools_inspect.cpp     |    4 +-
 src/bustools_mash.cpp        |    6 +-
 src/bustools_merge.cpp       |    4 +-
 src/bustools_project.cpp     |   12 +-
 src/bustools_umicorrect.cpp  |    6 +-
 src/robin_hood.h             | 2544 ++++++++++++++++++++++++++++++++++
 13 files changed, 2589 insertions(+), 42 deletions(-)
 create mode 100644 src/robin_hood.h

diff --git a/src/BUSData.cpp b/src/BUSData.cpp
index c97f80f..397e6dd 100644
--- a/src/BUSData.cpp
+++ b/src/BUSData.cpp
@@ -258,7 +258,7 @@ bool writeECs(const std::string &filename, const BUSHeader &header) {
   return true;
 }
 
-bool writeGenes(const std::string &filename, const std::unordered_map<std::string, int32_t>  &genenames) {
+bool writeGenes(const std::string &filename, const u_map_<std::string, int32_t>  &genenames) {
   std::ofstream outf;
   outf.open(filename.c_str(), std::ios::out);
 
@@ -279,7 +279,7 @@ bool writeGenes(const std::string &filename, const std::unordered_map<std::strin
   return true;
 }
 
-bool parseTranscripts(const std::string &filename, std::unordered_map<std::string, int32_t> &txnames) {
+bool parseTranscripts(const std::string &filename, u_map_<std::string, int32_t> &txnames) {
   std::ifstream inf(filename.c_str());
 
   int i = 0;
@@ -291,7 +291,7 @@ bool parseTranscripts(const std::string &filename, std::unordered_map<std::strin
   return true;
 }
 
-bool parseTxCaptureList(const std::string &filename, std::unordered_map<std::string, int32_t> &txnames, std::unordered_set<uint64_t> &captures) {
+bool parseTxCaptureList(const std::string &filename, u_map_<std::string, int32_t> &txnames, std::unordered_set<uint64_t> &captures) {
   std::ifstream inf(filename.c_str());
 
   std::string txp;
@@ -318,7 +318,7 @@ bool parseBcUmiCaptureList(const std::string &filename, std::unordered_set<uint6
   return true;
 }
 
-bool parse_ProjectMap(const std::string &filename, std::unordered_map<uint64_t, uint64_t> &project_map) {
+bool parse_ProjectMap(const std::string &filename, u_map_<uint64_t, uint64_t> &project_map) {
   // This function occurs in 3 places: here, BUSData.h, and bustools_project.cpp
   std::ifstream inf(filename.c_str());
 
@@ -346,7 +346,7 @@ bool parseFlagsCaptureList(const std::string &filename, std::unordered_set<uint6
   return true;
 }
 
-bool parseGenes(const std::string &filename, const std::unordered_map<std::string, int32_t> &txnames, std::vector<int32_t> &genemap, std::unordered_map<std::string, int32_t> &genenames) {
+bool parseGenes(const std::string &filename, const u_map_<std::string, int32_t> &txnames, std::vector<int32_t> &genemap, u_map_<std::string, int32_t> &genenames) {
   std::ifstream inf(filename.c_str());
 
   std::string line, t;
diff --git a/src/BUSData.h b/src/BUSData.h
index 5c697f7..227f889 100644
--- a/src/BUSData.h
+++ b/src/BUSData.h
@@ -7,6 +7,7 @@
 #include <unordered_set>
 #include <stdint.h>
 #include <fstream>
+#include "Common.hpp"
 
 const uint32_t BUSFORMAT_VERSION = 1;
 
@@ -66,15 +67,15 @@ int identifyParseHeader(std::istream &inf, BUSHeader &header, compressed_BUSHead
 bool parseECs_stream(std::istream &in, BUSHeader &header);
 bool parseECs(const std::string &filename, BUSHeader &header);
 bool writeECs(const std::string &filename, const BUSHeader &header);
-bool writeGenes(const std::string &filename, const std::unordered_map<std::string, int32_t>  &genenames);
-bool parseGenes(const std::string &filename, const std::unordered_map<std::string, int32_t> &txnames, std::vector<int32_t> &genemap, std::unordered_map<std::string, int32_t> &genenames);
+bool writeGenes(const std::string &filename, const u_map_<std::string, int32_t>  &genenames);
+bool parseGenes(const std::string &filename, const u_map_<std::string, int32_t> &txnames, std::vector<int32_t> &genemap, u_map_<std::string, int32_t> &genenames);
 bool parseGenesList(const std::string& filename, std::vector<std::string>& geneNames);
-bool parseTxCaptureList(const std::string &filename, std::unordered_map<std::string, int32_t> &txnames, std::unordered_set<uint64_t> &captures);
+bool parseTxCaptureList(const std::string &filename, u_map_<std::string, int32_t> &txnames, std::unordered_set<uint64_t> &captures);
 bool parseBcUmiCaptureList(const std::string &filename, std::unordered_set<uint64_t> &captures);
 bool parseFlagsCaptureList(const std::string &filename, std::unordered_set<uint64_t> &captures);
-bool parseTranscripts(const std::string &filename, std::unordered_map<std::string, int32_t> &txnames);
+bool parseTranscripts(const std::string &filename, u_map_<std::string, int32_t> &txnames);
 
-bool parse_ProjectMap(const std::string &filename, std::unordered_map<uint64_t, uint64_t> &project_map);
+bool parse_ProjectMap(const std::string &filename, u_map_<uint64_t, uint64_t> &project_map);
 
 uint64_t stringToBinary(const std::string &s, uint32_t &flag);
 uint64_t stringToBinary(const char* s, const size_t len, uint32_t &flag);
diff --git a/src/Common.hpp b/src/Common.hpp
index 006dd80..940a4a8 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -9,9 +9,11 @@
 #include <string>
 #include <unordered_map>
 #include <sstream>
+#include "robin_hood.h"
 
 #define BUSTOOLS_VERSION "0.42.0"
 
+typedef robin_hood::unordered_flat_map u_map_;
 enum CAPTURE_TYPE : char
 {
   CAPTURE_NONE = 0,
@@ -169,10 +171,10 @@ struct SortedVectorHasher
 std::vector<int32_t> intersect(std::vector<int32_t> &u, std::vector<int32_t> &v);
 std::vector<int32_t> union_vectors(const std::vector<std::vector<int32_t>> &v);
 std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &v);
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
 void vt2gene(const std::vector<int32_t> &v, const std::vector<int32_t> &genemap, std::vector<int32_t> &glist);
 void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const std::vector<std::vector<int32_t>> &ec2genes, std::vector<int32_t> &glist);
-int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
+int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
 void create_ec2genes(const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ec2gene);
 COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split);
 COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector<int32_t>& ecs, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split);
diff --git a/src/bustools_capture.cpp b/src/bustools_capture.cpp
index d03e1fd..a90ff2e 100644
--- a/src/bustools_capture.cpp
+++ b/src/bustools_capture.cpp
@@ -12,11 +12,11 @@ void bustools_capture(Bustools_opt &opt) {
 
   std::unordered_set<uint64_t> captures;
   std::vector<std::vector<int32_t>> ecmap;
-  std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+  u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
 
   if (opt.type == CAPTURE_TX) {
     // parse ecmap and capture list
-    std::unordered_map<std::string, int32_t> txnames;
+    u_map_<std::string, int32_t> txnames;
     std::cerr << "Parsing transcripts .. "; std::cerr.flush();
     parseTranscripts(opt.count_txp, txnames);
     std::cerr << "done" << std::endl;
diff --git a/src/bustools_clusterhist.cpp b/src/bustools_clusterhist.cpp
index 2bbb667..591eeeb 100644
--- a/src/bustools_clusterhist.cpp
+++ b/src/bustools_clusterhist.cpp
@@ -18,13 +18,13 @@ void bustools_clusterhist(Bustools_opt& opt) {
 
 	// read and parse the equivelence class files
 
-	std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+	u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
 	std::vector<std::vector<int32_t>> ecmap;
 
-	std::unordered_map<std::string, int32_t> txnames;
+	u_map_<std::string, int32_t> txnames;
 	parseTranscripts(opt.count_txp, txnames);
 	std::vector<int32_t> genemap(txnames.size(), -1);
-	std::unordered_map<std::string, int32_t> genenames;
+	u_map_<std::string, int32_t> genenames;
 	parseGenes(opt.count_genes, txnames, genemap, genenames);
 	parseECs(opt.count_ecs, h);
 	ecmap = std::move(h.ecs);
@@ -52,7 +52,7 @@ void bustools_clusterhist(Bustools_opt& opt) {
 
 	//Read the cluster file
 	std::vector<std::string> clusterNames;
-	std::unordered_map<uint64_t, size_t> bcClusters;
+	u_map_<uint64_t, size_t> bcClusters;
 	{
 		std::ifstream ifs(opt.cluster_input_file);
 		uint32_t flag = 0;
diff --git a/src/bustools_collapse.cpp b/src/bustools_collapse.cpp
index 2111c9b..120d0ef 100644
--- a/src/bustools_collapse.cpp
+++ b/src/bustools_collapse.cpp
@@ -17,13 +17,13 @@ void bustools_collapse(Bustools_opt &opt) {
 
   // read and parse the equivelence class files
   
-  std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+  u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
   std::vector<std::vector<int32_t>> ecmap;
 
-  std::unordered_map<std::string, int32_t> txnames;
+  u_map_<std::string, int32_t> txnames;
   parseTranscripts(opt.count_txp, txnames);
   std::vector<int32_t> genemap(txnames.size(), -1);
-  std::unordered_map<std::string, int32_t> genenames;
+  u_map_<std::string, int32_t> genenames;
   parseGenes(opt.count_genes, txnames, genemap, genenames);
   parseECs(opt.count_ecs, h);
   ecmap = std::move(h.ecs);
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index 68e9bab..b4a367f 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -18,17 +18,17 @@ void bustools_count(Bustools_opt &opt) {
 
   // read and parse the equivalence class files
 
-  std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+  u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
   std::vector<std::vector<int32_t>> ecmap;
 
-  std::unordered_map<std::string, int32_t> txnames;
+  u_map_<std::string, int32_t> txnames;
   auto txnames_split = txnames; // copy
   std::vector<int32_t> tx_split;
   tx_split.reserve(txnames_split.size());
   for (auto x : txnames_split) tx_split.push_back(txnames[x.first]);
   parseTranscripts(opt.count_txp, txnames);
   std::vector<int32_t> genemap(txnames.size(), -1);
-  std::unordered_map<std::string, int32_t> genenames;
+  u_map_<std::string, int32_t> genenames;
   parseGenes(opt.count_genes, txnames, genemap, genenames);
   parseECs(opt.count_ecs, h);
   ecmap = std::move(h.ecs);
@@ -389,7 +389,7 @@ void bustools_count(Bustools_opt &opt) {
     }
     std::sort(column_vp.begin(), column_vp.end());
     size_t m = column_vp.size();
-    std::unordered_map<int32_t, double> col_map(m);
+    u_map_<int32_t, double> col_map(m);
     auto col_map_2 = col_map; // copy
     auto col_map_A = col_map; // copy
     std::vector<int32_t> cols;
@@ -432,7 +432,7 @@ void bustools_count(Bustools_opt &opt) {
 
     if (opt.count_em) {
       //std::cerr << "Running EM algorithm" << std::endl;
-      std::unordered_map<int32_t, double> c1,c2;
+      u_map_<int32_t, double> c1,c2;
       // initialize with unique counts
       for (const auto &x : cols) {
         double val = 0;
diff --git a/src/bustools_inspect.cpp b/src/bustools_inspect.cpp
index 25694cf..2808474 100644
--- a/src/bustools_inspect.cpp
+++ b/src/bustools_inspect.cpp
@@ -100,7 +100,7 @@ void bustools_inspect(Bustools_opt &opt) {
   int64_t gt_records = 0;
 
   /* Frequency of number of targets per set, with multiplicity. */
-  std::unordered_map<uint32_t, uint32_t> freq_targetsPerSet;
+  u_map_<uint32_t, uint32_t> freq_targetsPerSet;
   /* Frequency of targets (for Good-Toulmin). */
   std::vector<uint32_t> freq_targets(numTargets, 0);
 
@@ -273,7 +273,7 @@ void bustools_inspect(Bustools_opt &opt) {
   // Good-Toulmin for number of targets
   // Also number of targets detected
   uint64_t targetsDetected = 0;
-  std::unordered_map<uint32_t, uint32_t> freq_freq_targets;
+  u_map_<uint32_t, uint32_t> freq_freq_targets;
   for (const auto &elt : freq_targets) {
     if (elt) {
       ++targetsDetected;
diff --git a/src/bustools_mash.cpp b/src/bustools_mash.cpp
index d9c3d17..8faa65d 100644
--- a/src/bustools_mash.cpp
+++ b/src/bustools_mash.cpp
@@ -12,7 +12,7 @@
 
 #include "bustools_merge.h"
 
-inline std::vector<int32_t> get_tids(const BUSHeader &oh, const std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, const int32_t &eid)
+inline std::vector<int32_t> get_tids(const BUSHeader &oh, const u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, const int32_t &eid)
 {
 
     std::vector<int32_t> tids = oh.ecs[eid];
@@ -56,7 +56,7 @@ void bustools_mash(const Bustools_opt &opt)
     std::cerr << "[info] parsed output.bus files" << std::endl;
 
     // parse the transcripts.txt
-    std::unordered_map<std::string, int32_t> txn_tid;
+    u_map_<std::string, int32_t> txn_tid;
     std::vector<std::vector<int32_t>> tids_per_file; // list of tids as they occur for each file
     std::vector<int32_t> tids;                       // a vector of tids
     int32_t tid = 0;
@@ -101,7 +101,7 @@ void bustools_mash(const Bustools_opt &opt)
     oh.bclen = vh[0].bclen;
     oh.umilen = vh[0].umilen;
 
-    std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv; // set{tids} (ec) to eid it came from
+    u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv; // set{tids} (ec) to eid it came from
 
     for (int32_t i = 0; i < tid; i++)
     {
diff --git a/src/bustools_merge.cpp b/src/bustools_merge.cpp
index f330efe..c89eae3 100644
--- a/src/bustools_merge.cpp
+++ b/src/bustools_merge.cpp
@@ -57,7 +57,7 @@ void bustools_merge_different_index(const Bustools_opt &opt)
   std::ifstream ifn(opt.count_txp);
   std::string txn;
   int32_t tid;
-  std::unordered_map<std::string, int32_t> txn_tid;
+  u_map_<std::string, int32_t> txn_tid;
   std::vector<int32_t> tids;
 
   // insert tids into a vector
@@ -81,7 +81,7 @@ void bustools_merge_different_index(const Bustools_opt &opt)
   BUSHeader h, bh;
   parseECs(opt.count_ecs, h);
   // put the ecs into a ecmap inv
-  std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+  u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
 
   for (std::size_t ec = 0; ec < h.ecs.size(); ec++)
   {
diff --git a/src/bustools_project.cpp b/src/bustools_project.cpp
index c5b1979..6d1e195 100644
--- a/src/bustools_project.cpp
+++ b/src/bustools_project.cpp
@@ -20,7 +20,7 @@ void bustools_project(Bustools_opt &opt) {
   size_t stat_map = 0;
   size_t stat_unmap = 0;
 
-  std::unordered_map<uint64_t, uint64_t> project_map;
+  u_map_<uint64_t, uint64_t> project_map;
 
   /* Load the map into project_map variable
   parse bus records and map each object (barcode, umi) with project_map
@@ -175,18 +175,18 @@ void bustools_project(Bustools_opt &opt) {
   }
   if (opt.type == PROJECT_TX) {
     std::ofstream of;
-    std::unordered_map<std::string, int32_t> txnames;
+    u_map_<std::string, int32_t> txnames;
     parseTranscripts(opt.count_txp, txnames);
 
     std::vector<int32_t> genemap(txnames.size(), -1);
-    std::unordered_map<std::string, int32_t> genenames;
+    u_map_<std::string, int32_t> genenames;
     parseGenes(opt.map, txnames, genemap, genenames);
     std::vector<std::string> genenamesinv(genenames.size(), "");
     for (const auto &gene : genenames) {
       genenamesinv[gene.second] = gene.first;
     }
 
-    std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+    u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
     std::vector<std::vector<int32_t>> ecmap;
     parseECs(opt.count_ecs, h);
     ecmap = std::move(h.ecs);
@@ -199,7 +199,7 @@ void bustools_project(Bustools_opt &opt) {
     create_ec2genes(ecmap, genemap, ec2genes);
 
     std::vector<std::vector<int32_t>> geneEc2genes = ec2genes;
-    std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> geneEc2genesinv;
+    u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> geneEc2genesinv;
     std::sort(geneEc2genes.begin(), geneEc2genes.end());
     auto firstNonempty = geneEc2genes.begin();
     while (firstNonempty->size() == 0 && firstNonempty != geneEc2genes.end()) {
@@ -284,7 +284,7 @@ void bustools_project(Bustools_opt &opt) {
     BUSData *p = new BUSData[N];
     BUSData currRec;
     // Gene EC --> counts for current barcode/UMI pair
-    std::unordered_map<uint32_t, uint32_t> counts;
+    u_map_<uint32_t, uint32_t> counts;
     
     while (true) {
       in.read((char*) p, N * sizeof(BUSData));
diff --git a/src/bustools_umicorrect.cpp b/src/bustools_umicorrect.cpp
index 6efdadb..a6facbb 100644
--- a/src/bustools_umicorrect.cpp
+++ b/src/bustools_umicorrect.cpp
@@ -231,13 +231,13 @@ void bustools_umicorrect(const Bustools_opt& opt) {
 
 	// read and parse the equivelence class files
 
-	std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+	u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
 	std::vector<std::vector<int32_t>> ecmap;
 
-	std::unordered_map<std::string, int32_t> txnames;
+	u_map_<std::string, int32_t> txnames;
 	parseTranscripts(opt.count_txp, txnames);
 	std::vector<int32_t> genemap(txnames.size(), -1);
-	std::unordered_map<std::string, int32_t> genenames;
+	u_map_<std::string, int32_t> genenames;
 	parseGenes(opt.count_genes, txnames, genemap, genenames);
 	parseECs(opt.count_ecs, h);
 	ecmap = std::move(h.ecs);
diff --git a/src/robin_hood.h b/src/robin_hood.h
new file mode 100644
index 0000000..0af031f
--- /dev/null
+++ b/src/robin_hood.h
@@ -0,0 +1,2544 @@
+//                 ______  _____                 ______                _________
+//  ______________ ___  /_ ___(_)_______         ___  /_ ______ ______ ______  /
+//  __  ___/_  __ \__  __ \__  / __  __ \        __  __ \_  __ \_  __ \_  __  /
+//  _  /    / /_/ /_  /_/ /_  /  _  / / /        _  / / // /_/ // /_/ // /_/ /
+//  /_/     \____/ /_.___/ /_/   /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/
+//                                      _/_____/
+//
+// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
+// https://github.com/martinus/robin-hood-hashing
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2021 Martin Ankerl <http://martin.ankerl.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef ROBIN_HOOD_H_INCLUDED
+#define ROBIN_HOOD_H_INCLUDED
+
+// see https://semver.org/
+#define ROBIN_HOOD_VERSION_MAJOR 3  // for incompatible API changes
+#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner
+#define ROBIN_HOOD_VERSION_PATCH 5  // for backwards-compatible bug fixes
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory> // only to support hash of smart pointers
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#if __cplusplus >= 201703L
+#    include <string_view>
+#endif
+
+// #define ROBIN_HOOD_LOG_ENABLED
+#ifdef ROBIN_HOOD_LOG_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_LOG(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_LOG(x)
+#endif
+
+// #define ROBIN_HOOD_TRACE_ENABLED
+#ifdef ROBIN_HOOD_TRACE_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_TRACE(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_TRACE(x)
+#endif
+
+// #define ROBIN_HOOD_COUNT_ENABLED
+#ifdef ROBIN_HOOD_COUNT_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_COUNT(x) ++counts().x;
+namespace robin_hood {
+struct Counts {
+    uint64_t shiftUp{};
+    uint64_t shiftDown{};
+};
+inline std::ostream& operator<<(std::ostream& os, Counts const& c) {
+    return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl;
+}
+
+static Counts& counts() {
+    static Counts counts{};
+    return counts;
+}
+} // namespace robin_hood
+#else
+#    define ROBIN_HOOD_COUNT(x)
+#endif
+
+// all non-argument macros should use this facility. See
+// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/
+#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x()
+
+// mark unused members with this macro
+#define ROBIN_HOOD_UNUSED(identifier)
+
+// bitness
+#if SIZE_MAX == UINT32_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32
+#elif SIZE_MAX == UINT64_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64
+#else
+#    error Unsupported bitness
+#endif
+
+// endianess
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \
+        (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#endif
+
+// inline
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline)
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline))
+#endif
+
+// exceptions
+#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1
+#endif
+
+// count leading/trailing bits
+#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+#    ifdef _MSC_VER
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
+#        endif
+#        include <intrin.h>
+#        pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x)                                       \
+            [](size_t mask) noexcept -> int {                                             \
+                unsigned long index;                                                      \
+                return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
+                                                                : ROBIN_HOOD(BITNESS);    \
+            }(x)
+#    else
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
+#        endif
+#        define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
+#    endif
+#endif
+
+// fallthrough
+#ifndef __has_cpp_attribute // For backwards compatibility
+#    define __has_cpp_attribute(x) 0
+#endif
+#if __has_cpp_attribute(clang::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH()
+#endif
+
+// likely/unlikely
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_LIKELY(condition) condition
+#    define ROBIN_HOOD_UNLIKELY(condition) condition
+#else
+#    define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1)
+#    define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0)
+#endif
+
+// detect if native wchar_t type is availiable in MSVC
+#ifdef _MSC_VER
+#    ifdef _NATIVE_WCHAR_T_DEFINED
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#endif
+
+// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr
+#ifdef _MSC_VER
+#    if _MSC_VER <= 1900
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
+#endif
+
+// workaround missing "is_trivially_copyable" in g++ < 5.0
+// See https://stackoverflow.com/a/31798726/48181
+#if defined(__GNUC__) && __GNUC__ < 5
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#else
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
+#endif
+
+// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD()
+#endif
+
+namespace robin_hood {
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+#    define ROBIN_HOOD_STD std
+#else
+
+// c++11 compatibility layer
+namespace ROBIN_HOOD_STD {
+template <class T>
+struct alignment_of
+    : std::integral_constant<std::size_t, alignof(typename std::remove_all_extents<T>::type)> {};
+
+template <class T, T... Ints>
+class integer_sequence {
+public:
+    using value_type = T;
+    static_assert(std::is_integral<value_type>::value, "not integral type");
+    static constexpr std::size_t size() noexcept {
+        return sizeof...(Ints);
+    }
+};
+template <std::size_t... Inds>
+using index_sequence = integer_sequence<std::size_t, Inds...>;
+
+namespace detail_ {
+template <class T, T Begin, T End, bool>
+struct IntSeqImpl {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)");
+
+    template <class, class>
+    struct IntSeqCombiner;
+
+    template <TValue... Inds0, TValue... Inds1>
+    struct IntSeqCombiner<integer_sequence<TValue, Inds0...>, integer_sequence<TValue, Inds1...>> {
+        using TResult = integer_sequence<TValue, Inds0..., Inds1...>;
+    };
+
+    using TResult =
+        typename IntSeqCombiner<typename IntSeqImpl<TValue, Begin, Begin + (End - Begin) / 2,
+                                                    (End - Begin) / 2 == 1>::TResult,
+                                typename IntSeqImpl<TValue, Begin + (End - Begin) / 2, End,
+                                                    (End - Begin + 1) / 2 == 1>::TResult>::TResult;
+};
+
+template <class T, T Begin>
+struct IntSeqImpl<T, Begin, Begin, false> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue>;
+};
+
+template <class T, T Begin, T End>
+struct IntSeqImpl<T, Begin, End, true> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue, Begin>;
+};
+} // namespace detail_
+
+template <class T, T N>
+using make_integer_sequence = typename detail_::IntSeqImpl<T, 0, N, (N - 0) == 1>::TResult;
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+template <class... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+} // namespace ROBIN_HOOD_STD
+
+#endif
+
+namespace detail {
+
+// make sure we static_cast to the correct type for hash_int
+#if ROBIN_HOOD(BITNESS) == 64
+using SizeT = uint64_t;
+#else
+using SizeT = uint32_t;
+#endif
+
+template <typename T>
+T rotr(T x, unsigned k) {
+    return (x >> k) | (x << (8U * sizeof(T) - k));
+}
+
+// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to
+// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with
+// care!
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
+// inlinings more difficult. Throws are also generally the slow path.
+template <typename E, typename... Args>
+[[noreturn]] ROBIN_HOOD(NOINLINE)
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+    void doThrow(Args&&... args) {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+    throw E(std::forward<Args>(args)...);
+}
+#else
+    void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
+    abort();
+}
+#endif
+
+template <typename E, typename T, typename... Args>
+T* assertNotNull(T* t, Args&&... args) {
+    if (ROBIN_HOOD_UNLIKELY(nullptr == t)) {
+        doThrow<E>(std::forward<Args>(args)...);
+    }
+    return t;
+}
+
+template <typename T>
+inline T unaligned_load(void const* ptr) noexcept {
+    // using memcpy so we don't get into unaligned load problems.
+    // compiler should optimize this very well anyways.
+    T t;
+    std::memcpy(&t, ptr, sizeof(T));
+    return t;
+}
+
+// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor,
+// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a
+// pointer.
+template <typename T, size_t MinNumAllocs = 4, size_t MaxNumAllocs = 256>
+class BulkPoolAllocator {
+public:
+    BulkPoolAllocator() noexcept = default;
+
+    // does not copy anything, just creates a new allocator.
+    BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept
+        : mHead(nullptr)
+        , mListForFree(nullptr) {}
+
+    BulkPoolAllocator(BulkPoolAllocator&& o) noexcept
+        : mHead(o.mHead)
+        , mListForFree(o.mListForFree) {
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+    }
+
+    BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept {
+        reset();
+        mHead = o.mHead;
+        mListForFree = o.mListForFree;
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+        return *this;
+    }
+
+    BulkPoolAllocator&
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept {
+        // does not do anything
+        return *this;
+    }
+
+    ~BulkPoolAllocator() noexcept {
+        reset();
+    }
+
+    // Deallocates all allocated memory.
+    void reset() noexcept {
+        while (mListForFree) {
+            T* tmp = *mListForFree;
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mListForFree);
+            mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        }
+        mHead = nullptr;
+    }
+
+    // allocates, but does NOT initialize. Use in-place new constructor, e.g.
+    //   T* obj = pool.allocate();
+    //   ::new (static_cast<void*>(obj)) T();
+    T* allocate() {
+        T* tmp = mHead;
+        if (!tmp) {
+            tmp = performAllocation();
+        }
+
+        mHead = *reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        return tmp;
+    }
+
+    // does not actually deallocate but puts it in store.
+    // make sure you have already called the destructor! e.g. with
+    //  obj->~T();
+    //  pool.deallocate(obj);
+    void deallocate(T* obj) noexcept {
+        *reinterpret_cast_no_cast_align_warning<T**>(obj) = mHead;
+        mHead = obj;
+    }
+
+    // Adds an already allocated block of memory to the allocator. This allocator is from now on
+    // responsible for freeing the data (with free()). If the provided data is not large enough to
+    // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor.
+    void addOrFree(void* ptr, const size_t numBytes) noexcept {
+        // calculate number of available elements in ptr
+        if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
+            // not enough data for at least one element. Free and return.
+            ROBIN_HOOD_LOG("std::free")
+            std::free(ptr);
+        } else {
+            ROBIN_HOOD_LOG("add to buffer")
+            add(ptr, numBytes);
+        }
+    }
+
+    void swap(BulkPoolAllocator<T, MinNumAllocs, MaxNumAllocs>& other) noexcept {
+        using std::swap;
+        swap(mHead, other.mHead);
+        swap(mListForFree, other.mListForFree);
+    }
+
+private:
+    // iterates the list of allocated memory to calculate how many to alloc next.
+    // Recalculating this each time saves us a size_t member.
+    // This ignores the fact that memory blocks might have been added manually with addOrFree. In
+    // practice, this should not matter much.
+    ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept {
+        auto tmp = mListForFree;
+        size_t numAllocs = MinNumAllocs;
+
+        while (numAllocs * 2 <= MaxNumAllocs && tmp) {
+            auto x = reinterpret_cast<T***>(tmp);
+            tmp = *x;
+            numAllocs *= 2;
+        }
+
+        return numAllocs;
+    }
+
+    // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree().
+    void add(void* ptr, const size_t numBytes) noexcept {
+        const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE;
+
+        auto data = reinterpret_cast<T**>(ptr);
+
+        // link free list
+        auto x = reinterpret_cast<T***>(data);
+        *x = mListForFree;
+        mListForFree = data;
+
+        // create linked list for newly allocated data
+        auto* const headT =
+            reinterpret_cast_no_cast_align_warning<T*>(reinterpret_cast<char*>(ptr) + ALIGNMENT);
+
+        auto* const head = reinterpret_cast<char*>(headT);
+
+        // Visual Studio compiler automatically unrolls this loop, which is pretty cool
+        for (size_t i = 0; i < numElements; ++i) {
+            *reinterpret_cast_no_cast_align_warning<char**>(head + i * ALIGNED_SIZE) =
+                head + (i + 1) * ALIGNED_SIZE;
+        }
+
+        // last one points to 0
+        *reinterpret_cast_no_cast_align_warning<T**>(head + (numElements - 1) * ALIGNED_SIZE) =
+            mHead;
+        mHead = headT;
+    }
+
+    // Called when no memory is available (mHead == 0).
+    // Don't inline this slow path.
+    ROBIN_HOOD(NOINLINE) T* performAllocation() {
+        size_t const numElementsToAlloc = calcNumElementsToAlloc();
+
+        // alloc new memory: [prev |T, T, ... T]
+        size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
+        ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
+                                      << " * " << numElementsToAlloc)
+        add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
+        return mHead;
+    }
+
+    // enforce byte alignment of the T's
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+    static constexpr size_t ALIGNMENT =
+        (std::max)(std::alignment_of<T>::value, std::alignment_of<T*>::value);
+#else
+    static const size_t ALIGNMENT =
+        (ROBIN_HOOD_STD::alignment_of<T>::value > ROBIN_HOOD_STD::alignment_of<T*>::value)
+            ? ROBIN_HOOD_STD::alignment_of<T>::value
+            : +ROBIN_HOOD_STD::alignment_of<T*>::value; // the + is for walkarround
+#endif
+
+    static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT;
+
+    static_assert(MinNumAllocs >= 1, "MinNumAllocs");
+    static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs");
+    static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE");
+    static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod");
+    static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT");
+
+    T* mHead{nullptr};
+    T** mListForFree{nullptr};
+};
+
+template <typename T, size_t MinSize, size_t MaxSize, bool IsFlat>
+struct NodeAllocator;
+
+// dummy allocator that does nothing
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, true> {
+
+    // we are not using the data, so just free it.
+    void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
+        ROBIN_HOOD_LOG("std::free")
+        std::free(ptr);
+    }
+};
+
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, false> : public BulkPoolAllocator<T, MinSize, MaxSize> {};
+
+// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making
+// my own here.
+namespace swappable {
+#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17)
+using std::swap;
+template <typename T>
+struct nothrow {
+    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
+};
+#else
+template <typename T>
+struct nothrow {
+    static const bool value = std::is_nothrow_swappable<T>::value;
+};
+#endif
+} // namespace swappable
+
+} // namespace detail
+
+struct is_transparent_tag {};
+
+// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable,
+// which means it would  not be allowed to be used in std::memcpy. This struct is copyable, which is
+// also tested.
+template <typename T1, typename T2>
+struct pair {
+    using first_type = T1;
+    using second_type = T2;
+
+    template <typename U1 = T1, typename U2 = T2,
+              typename = typename std::enable_if<std::is_default_constructible<U1>::value &&
+                                                 std::is_default_constructible<U2>::value>::type>
+    constexpr pair() noexcept(noexcept(U1()) && noexcept(U2()))
+        : first()
+        , second() {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2> const& o) noexcept(
+        noexcept(T1(std::declval<T1 const&>())) && noexcept(T2(std::declval<T2 const&>())))
+        : first(o.first)
+        , second(o.second) {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2>&& o) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(o.first))
+        , second(std::move(o.second)) {}
+
+    constexpr pair(T1&& a, T2&& b) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(a))
+        , second(std::move(b)) {}
+
+    template <typename U1, typename U2>
+    constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward<U1>(
+        std::declval<U1&&>()))) && noexcept(T2(std::forward<U2>(std::declval<U2&&>()))))
+        : first(std::forward<U1>(a))
+        , second(std::forward<U2>(b)) {}
+
+    template <typename... U1, typename... U2>
+    // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members"
+    // if this constructor is constexpr
+#if !ROBIN_HOOD(BROKEN_CONSTEXPR)
+    constexpr
+#endif
+        pair(std::piecewise_construct_t /*unused*/, std::tuple<U1...> a,
+             std::tuple<U2...>
+                 b) noexcept(noexcept(pair(std::declval<std::tuple<U1...>&>(),
+                                           std::declval<std::tuple<U2...>&>(),
+                                           ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+                                           ROBIN_HOOD_STD::index_sequence_for<U2...>())))
+        : pair(a, b, ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+               ROBIN_HOOD_STD::index_sequence_for<U2...>()) {
+    }
+
+    // constructor called from the std::piecewise_construct_t ctor
+    template <typename... U1, size_t... I1, typename... U2, size_t... I2>
+    pair(std::tuple<U1...>& a, std::tuple<U2...>& b, ROBIN_HOOD_STD::index_sequence<I1...> /*unused*/, ROBIN_HOOD_STD::index_sequence<I2...> /*unused*/) noexcept(
+        noexcept(T1(std::forward<U1>(std::get<I1>(
+            std::declval<std::tuple<
+                U1...>&>()))...)) && noexcept(T2(std::
+                                                     forward<U2>(std::get<I2>(
+                                                         std::declval<std::tuple<U2...>&>()))...)))
+        : first(std::forward<U1>(std::get<I1>(a))...)
+        , second(std::forward<U2>(std::get<I2>(b))...) {
+        // make visual studio compiler happy about warning about unused a & b.
+        // Visual studio's pair implementation disables warning 4100.
+        (void)a;
+        (void)b;
+    }
+
+    void swap(pair<T1, T2>& o) noexcept((detail::swappable::nothrow<T1>::value) &&
+                                        (detail::swappable::nothrow<T2>::value)) {
+        using std::swap;
+        swap(first, o.first);
+        swap(second, o.second);
+    }
+
+    T1 first;  // NOLINT(misc-non-private-member-variables-in-classes)
+    T2 second; // NOLINT(misc-non-private-member-variables-in-classes)
+};
+
+template <typename A, typename B>
+inline void swap(pair<A, B>& a, pair<A, B>& b) noexcept(
+    noexcept(std::declval<pair<A, B>&>().swap(std::declval<pair<A, B>&>()))) {
+    a.swap(b);
+}
+
+template <typename A, typename B>
+inline constexpr bool operator==(pair<A, B> const& x, pair<A, B> const& y) {
+    return (x.first == y.first) && (x.second == y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator!=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x == y);
+}
+template <typename A, typename B>
+inline constexpr bool operator<(pair<A, B> const& x, pair<A, B> const& y) noexcept(noexcept(
+    std::declval<A const&>() < std::declval<A const&>()) && noexcept(std::declval<B const&>() <
+                                                                     std::declval<B const&>())) {
+    return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator>(pair<A, B> const& x, pair<A, B> const& y) {
+    return y < x;
+}
+template <typename A, typename B>
+inline constexpr bool operator<=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x > y);
+}
+template <typename A, typename B>
+inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x < y);
+}
+
+inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
+    static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
+    static constexpr uint64_t seed = UINT64_C(0xe17a1465);
+    static constexpr unsigned int r = 47;
+
+    auto const* const data64 = static_cast<uint64_t const*>(ptr);
+    uint64_t h = seed ^ (len * m);
+
+    size_t const n_blocks = len / 8;
+    for (size_t i = 0; i < n_blocks; ++i) {
+        auto k = detail::unaligned_load<uint64_t>(data64 + i);
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h ^= k;
+        h *= m;
+    }
+
+    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
+    switch (len & 7U) {
+    case 7:
+        h ^= static_cast<uint64_t>(data8[6]) << 48U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 6:
+        h ^= static_cast<uint64_t>(data8[5]) << 40U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 5:
+        h ^= static_cast<uint64_t>(data8[4]) << 32U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 4:
+        h ^= static_cast<uint64_t>(data8[3]) << 24U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 3:
+        h ^= static_cast<uint64_t>(data8[2]) << 16U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 2:
+        h ^= static_cast<uint64_t>(data8[1]) << 8U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 1:
+        h ^= static_cast<uint64_t>(data8[0]);
+        h *= m;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    default:
+        break;
+    }
+
+    h ^= h >> r;
+
+    // not doing the final step here, because this will be done by keyToIdx anyways
+    // h *= m;
+    // h ^= h >> r;
+    return static_cast<size_t>(h);
+}
+
+inline size_t hash_int(uint64_t x) noexcept {
+    // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested,
+    // and doesn't need any special 128bit operations.
+    x ^= x >> 33U;
+    x *= UINT64_C(0xff51afd7ed558ccd);
+    x ^= x >> 33U;
+
+    // not doing the final step here, because this will be done by keyToIdx anyways
+    // x *= UINT64_C(0xc4ceb9fe1a85ec53);
+    // x ^= x >> 33U;
+    return static_cast<size_t>(x);
+}
+
+// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
+template <typename T, typename Enable = void>
+struct hash : public std::hash<T> {
+    size_t operator()(T const& obj) const
+        noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
+        // call base hash
+        auto result = std::hash<T>::operator()(obj);
+        // return mixed of that, to be save against identity has
+        return hash_int(static_cast<detail::SizeT>(result));
+    }
+};
+
+template <typename CharT>
+struct hash<std::basic_string<CharT>> {
+    size_t operator()(std::basic_string<CharT> const& str) const noexcept {
+        return hash_bytes(str.data(), sizeof(CharT) * str.size());
+    }
+};
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+template <typename CharT>
+struct hash<std::basic_string_view<CharT>> {
+    size_t operator()(std::basic_string_view<CharT> const& sv) const noexcept {
+        return hash_bytes(sv.data(), sizeof(CharT) * sv.size());
+    }
+};
+#endif
+
+template <class T>
+struct hash<T*> {
+    size_t operator()(T* ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr));
+    }
+};
+
+template <class T>
+struct hash<std::unique_ptr<T>> {
+    size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <class T>
+struct hash<std::shared_ptr<T>> {
+    size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <typename Enum>
+struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
+    size_t operator()(Enum e) const noexcept {
+        using Underlying = typename std::underlying_type<Enum>::type;
+        return hash<Underlying>{}(static_cast<Underlying>(e));
+    }
+};
+
+#define ROBIN_HOOD_HASH_INT(T)                           \
+    template <>                                          \
+    struct hash<T> {                                     \
+        size_t operator()(T const& obj) const noexcept { \
+            return hash_int(static_cast<uint64_t>(obj)); \
+        }                                                \
+    }
+
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wuseless-cast"
+#endif
+// see https://en.cppreference.com/w/cpp/utility/hash
+ROBIN_HOOD_HASH_INT(bool);
+ROBIN_HOOD_HASH_INT(char);
+ROBIN_HOOD_HASH_INT(signed char);
+ROBIN_HOOD_HASH_INT(unsigned char);
+ROBIN_HOOD_HASH_INT(char16_t);
+ROBIN_HOOD_HASH_INT(char32_t);
+#if ROBIN_HOOD(HAS_NATIVE_WCHART)
+ROBIN_HOOD_HASH_INT(wchar_t);
+#endif
+ROBIN_HOOD_HASH_INT(short);
+ROBIN_HOOD_HASH_INT(unsigned short);
+ROBIN_HOOD_HASH_INT(int);
+ROBIN_HOOD_HASH_INT(unsigned int);
+ROBIN_HOOD_HASH_INT(long);
+ROBIN_HOOD_HASH_INT(long long);
+ROBIN_HOOD_HASH_INT(unsigned long);
+ROBIN_HOOD_HASH_INT(unsigned long long);
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic pop
+#endif
+namespace detail {
+
+template <typename T>
+struct void_type {
+    using type = void;
+};
+
+template <typename T, typename = void>
+struct has_is_transparent : public std::false_type {};
+
+template <typename T>
+struct has_is_transparent<T, typename void_type<typename T::is_transparent>::type>
+    : public std::true_type {};
+
+// using wrapper classes for hash and key_equal prevents the diamond problem when the same type
+// is used. see https://stackoverflow.com/a/28771920/48181
+template <typename T>
+struct WrapHash : public T {
+    WrapHash() = default;
+    explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+template <typename T>
+struct WrapKeyEqual : public T {
+    WrapKeyEqual() = default;
+    explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+// A highly optimized hashmap implementation, using the Robin Hood algorithm.
+//
+// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but
+// be about 2x faster in most cases and require much less allocations.
+//
+// This implementation uses the following memory layout:
+//
+// [Node, Node, ... Node | info, info, ... infoSentinel ]
+//
+// * Node: either a DataNode that directly has the std::pair<key, val> as member,
+//   or a DataNode with a pointer to std::pair<key,val>. Which DataNode representation to use
+//   depends on how fast the swap() operation is. Heuristically, this is automatically choosen
+//   based on sizeof(). there are always 2^n Nodes.
+//
+// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes.
+//   Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the
+//   corresponding node contains data. Set to 2 means the corresponding Node is filled, but it
+//   actually belongs to the previous position and was pushed out because that place is already
+//   taken.
+//
+// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the
+//   need for a idx variable.
+//
+// According to STL, order of templates has effect on throughput. That's why I've moved the
+// boolean to the front.
+// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/
+template <bool IsFlat, size_t MaxLoadFactor100, typename Key, typename T, typename Hash,
+          typename KeyEqual>
+class Table
+    : public WrapHash<Hash>,
+      public WrapKeyEqual<KeyEqual>,
+      detail::NodeAllocator<
+          typename std::conditional<
+              std::is_void<T>::value, Key,
+              robin_hood::pair<typename std::conditional<IsFlat, Key, Key const>::type, T>>::type,
+          4, 16384, IsFlat> {
+public:
+    static constexpr bool is_flat = IsFlat;
+    static constexpr bool is_map = !std::is_void<T>::value;
+    static constexpr bool is_set = !is_map;
+    static constexpr bool is_transparent =
+        has_is_transparent<Hash>::value && has_is_transparent<KeyEqual>::value;
+
+    using key_type = Key;
+    using mapped_type = T;
+    using value_type = typename std::conditional<
+        is_set, Key,
+        robin_hood::pair<typename std::conditional<is_flat, Key, Key const>::type, T>>::type;
+    using size_type = size_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using Self = Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+
+private:
+    static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100,
+                  "MaxLoadFactor100 needs to be >10 && < 100");
+
+    using WHash = WrapHash<Hash>;
+    using WKeyEqual = WrapKeyEqual<KeyEqual>;
+
+    // configuration defaults
+
+    // make sure we have 8 elements, needed to quickly rehash mInfo
+    static constexpr size_t InitialNumElements = sizeof(uint64_t);
+    static constexpr uint32_t InitialInfoNumBits = 5;
+    static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
+    static constexpr size_t InfoMask = InitialInfoInc - 1U;
+    static constexpr uint8_t InitialInfoHashShift = 0;
+    using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
+
+    // type needs to be wider than uint8_t.
+    using InfoType = uint32_t;
+
+    // DataNode ////////////////////////////////////////////////////////
+
+    // Primary template for the data node. We have special implementations for small and big
+    // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these
+    // on the heap so swap merely swaps a pointer.
+    template <typename M, bool>
+    class DataNode {};
+
+    // Small: just allocate on the stack.
+    template <typename M>
+    class DataNode<M, true> final {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept(
+            noexcept(value_type(std::forward<Args>(args)...)))
+            : mData(std::forward<Args>(args)...) {}
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, true>&& n) noexcept(
+            std::is_nothrow_move_constructible<value_type>::value)
+            : mData(std::move(n.mData)) {}
+
+        // doesn't do anything
+        void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {}
+        void destroyDoNotDeallocate() noexcept {}
+
+        value_type const* operator->() const noexcept {
+            return &mData;
+        }
+        value_type* operator->() noexcept {
+            return &mData;
+        }
+
+        const value_type& operator*() const noexcept {
+            return mData;
+        }
+
+        value_type& operator*() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData.second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, MT const&>::type getSecond() const noexcept {
+            return mData.second;
+        }
+
+        void swap(DataNode<M, true>& o) noexcept(
+            noexcept(std::declval<value_type>().swap(std::declval<value_type>()))) {
+            mData.swap(o.mData);
+        }
+
+    private:
+        value_type mData;
+    };
+
+    // big object: allocate on heap.
+    template <typename M>
+    class DataNode<M, false> {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& map, Args&&... args)
+            : mData(map.allocate()) {
+            ::new (static_cast<void*>(mData)) value_type(std::forward<Args>(args)...);
+        }
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, false>&& n) noexcept
+            : mData(std::move(n.mData)) {}
+
+        void destroy(M& map) noexcept {
+            // don't deallocate, just put it into list of datapool.
+            mData->~value_type();
+            map.deallocate(mData);
+        }
+
+        void destroyDoNotDeallocate() noexcept {
+            mData->~value_type();
+        }
+
+        value_type const* operator->() const noexcept {
+            return mData;
+        }
+
+        value_type* operator->() noexcept {
+            return mData;
+        }
+
+        const value_type& operator*() const {
+            return *mData;
+        }
+
+        value_type& operator*() {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return *mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData->second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT const&>::type getSecond() const noexcept {
+            return mData->second;
+        }
+
+        void swap(DataNode<M, false>& o) noexcept {
+            using std::swap;
+            swap(mData, o.mData);
+        }
+
+    private:
+        value_type* mData;
+    };
+
+    using Node = DataNode<Self, IsFlat>;
+
+    // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required)
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept {
+        return n.getFirst();
+    }
+
+    // in case we have void mapped_type, we are not using a pair, thus we just route k through.
+    // No need to disable this because it's just not used if not applicable.
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept {
+        return k;
+    }
+
+    // in case we have non-void mapped_type, we have a standard robin_hood::pair
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, key_type const&>::type
+        getFirstConst(value_type const& vt) const noexcept {
+        return vt.first;
+    }
+
+    // Cloner //////////////////////////////////////////////////////////
+
+    template <typename M, bool UseMemcpy>
+    struct Cloner;
+
+    // fast path: Just copy data, without allocating anything.
+    template <typename M>
+    struct Cloner<M, true> {
+        void operator()(M const& source, M& target) const {
+            auto const* const src = reinterpret_cast<char const*>(source.mKeyVals);
+            auto* tgt = reinterpret_cast<char*>(target.mKeyVals);
+            auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1);
+            std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt);
+        }
+    };
+
+    template <typename M>
+    struct Cloner<M, false> {
+        void operator()(M const& s, M& t) const {
+            auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1);
+            std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo);
+
+            for (size_t i = 0; i < numElementsWithBuffer; ++i) {
+                if (t.mInfo[i]) {
+                    ::new (static_cast<void*>(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]);
+                }
+            }
+        }
+    };
+
+    // Destroyer ///////////////////////////////////////////////////////
+
+    template <typename M, bool IsFlatAndTrivial>
+    struct Destroyer {};
+
+    template <typename M>
+    struct Destroyer<M, true> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+    };
+
+    template <typename M>
+    struct Destroyer<M, false> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroy(m);
+                    n.~Node();
+                }
+            }
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroyDoNotDeallocate();
+                    n.~Node();
+                }
+            }
+        }
+    };
+
+    // Iter ////////////////////////////////////////////////////////////
+
+    struct fast_forward_tag {};
+
+    // generic iterator for both const_iterator and iterator.
+    template <bool IsConst>
+    // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions)
+    class Iter {
+    private:
+        using NodePtr = typename std::conditional<IsConst, Node const*, Node*>::type;
+
+    public:
+        using difference_type = std::ptrdiff_t;
+        using value_type = typename Self::value_type;
+        using reference = typename std::conditional<IsConst, value_type const&, value_type&>::type;
+        using pointer = typename std::conditional<IsConst, value_type const*, value_type*>::type;
+        using iterator_category = std::forward_iterator_tag;
+
+        // default constructed iterator can be compared to itself, but WON'T return true when
+        // compared to end().
+        Iter() = default;
+
+        // Rule of zero: nothing specified. The conversion constructor is only enabled for
+        // iterator to const_iterator, so it doesn't accidentally work as a copy ctor.
+
+        // Conversion constructor from iterator to const_iterator.
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        // NOLINTNEXTLINE(hicpp-explicit-conversions)
+        Iter(Iter<OtherIsConst> const& other) noexcept
+            : mKeyVals(other.mKeyVals)
+            , mInfo(other.mInfo) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr,
+             fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {
+            fastForward();
+        }
+
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        Iter& operator=(Iter<OtherIsConst> const& other) noexcept {
+            mKeyVals = other.mKeyVals;
+            mInfo = other.mInfo;
+            return *this;
+        }
+
+        // prefix increment. Undefined behavior if we are at end()!
+        Iter& operator++() noexcept {
+            mInfo++;
+            mKeyVals++;
+            fastForward();
+            return *this;
+        }
+
+        Iter operator++(int) noexcept {
+            Iter tmp = *this;
+            ++(*this);
+            return tmp;
+        }
+
+        reference operator*() const {
+            return **mKeyVals;
+        }
+
+        pointer operator->() const {
+            return &**mKeyVals;
+        }
+
+        template <bool O>
+        bool operator==(Iter<O> const& o) const noexcept {
+            return mKeyVals == o.mKeyVals;
+        }
+
+        template <bool O>
+        bool operator!=(Iter<O> const& o) const noexcept {
+            return mKeyVals != o.mKeyVals;
+        }
+
+    private:
+        // fast forward to the next non-free info byte
+        // I've tried a few variants that don't depend on intrinsics, but unfortunately they are
+        // quite a bit slower than this one. So I've reverted that change again. See map_benchmark.
+        void fastForward() noexcept {
+            size_t n = 0;
+            while (0U == (n = detail::unaligned_load<size_t>(mInfo))) {
+                mInfo += sizeof(size_t);
+                mKeyVals += sizeof(size_t);
+            }
+#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+            // we know for certain that within the next 8 bytes we'll find a non-zero one.
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
+                mInfo += 4;
+                mKeyVals += 4;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
+                mInfo += 2;
+                mKeyVals += 2;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
+                mInfo += 1;
+                mKeyVals += 1;
+            }
+#else
+#    if ROBIN_HOOD(LITTLE_ENDIAN)
+            auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+#    else
+            auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
+#    endif
+            mInfo += inc;
+            mKeyVals += inc;
+#endif
+        }
+
+        friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+        NodePtr mKeyVals{nullptr};
+        uint8_t const* mInfo{nullptr};
+    };
+
+    ////////////////////////////////////////////////////////////////////
+
+    // highly performance relevant code.
+    // Lower bits are used for indexing into the array (2^n size)
+    // The upper 1-5 bits need to be a reasonable good hash, to save comparisons.
+    template <typename HashKey>
+    void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const {
+        // In addition to whatever hash is used, add another mul & shift so we get better hashing.
+        // This serves as a bad hash prevention, if the given data is
+        // badly mixed.
+        auto h = static_cast<uint64_t>(WHash::operator()(key));
+
+        h *= mHashMultiplier;
+        h ^= h >> 33U;
+
+        // the lower InitialInfoNumBits are reserved for info.
+        *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
+        *idx = (static_cast<size_t>(h) >> InitialInfoNumBits) & mMask;
+    }
+
+    // forwards the index by one, wrapping around at the end
+    void next(InfoType* info, size_t* idx) const noexcept {
+        *idx = *idx + 1;
+        *info += mInfoInc;
+    }
+
+    void nextWhileLess(InfoType* info, size_t* idx) const noexcept {
+        // unrolling this by hand did not bring any speedups.
+        while (*info < mInfo[*idx]) {
+            next(info, idx);
+        }
+    }
+
+    // Shift everything up by one element. Tries to move stuff around.
+    void
+    shiftUp(size_t startIdx,
+            size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        auto idx = startIdx;
+        ::new (static_cast<void*>(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1]));
+        while (--idx != insertion_idx) {
+            mKeyVals[idx] = std::move(mKeyVals[idx - 1]);
+        }
+
+        idx = startIdx;
+        while (idx != insertion_idx) {
+            ROBIN_HOOD_COUNT(shiftUp)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx - 1] + mInfoInc);
+            if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+            --idx;
+        }
+    }
+
+    void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        // until we find one that is either empty or has zero offset.
+        // TODO(martinus) we don't need to move everything, just the last one for the same
+        // bucket.
+        mKeyVals[idx].destroy(*this);
+
+        // until we find one that is either empty or has zero offset.
+        while (mInfo[idx + 1] >= 2 * mInfoInc) {
+            ROBIN_HOOD_COUNT(shiftDown)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx + 1] - mInfoInc);
+            mKeyVals[idx] = std::move(mKeyVals[idx + 1]);
+            ++idx;
+        }
+
+        mInfo[idx] = 0;
+        // don't destroy, we've moved it
+        // mKeyVals[idx].destroy(*this);
+        mKeyVals[idx].~Node();
+    }
+
+    // copy of find(), except that it returns iterator instead of const_iterator.
+    template <typename Other>
+    ROBIN_HOOD(NODISCARD)
+    size_t findIdx(Other const& key) const {
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        do {
+            // unrolling this twice gives a bit of a speedup. More unrolling did not help.
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found!
+        return mMask == 0 ? 0
+                          : static_cast<size_t>(std::distance(
+                                mKeyVals, reinterpret_cast_no_cast_align_warning<Node*>(mInfo)));
+    }
+
+    void cloneData(const Table& o) {
+        Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
+    }
+
+    // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
+    // @return True on success, false if something went wrong
+    void insert_move(Node&& keyval) {
+        // we don't retry, fail if overflowing
+        // don't need to check max num elements
+        if (0 == mMaxNumElementsAllowed && !try_increase_info()) {
+            throwOverflowError();
+        }
+
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(keyval.getFirst(), &idx, &info);
+
+        // skip forward. Use <= because we are certain that the element is not there.
+        while (info <= mInfo[idx]) {
+            idx = idx + 1;
+            info += mInfoInc;
+        }
+
+        // key not found, so we are now exactly where we want to insert it.
+        auto const insertion_idx = idx;
+        auto const insertion_info = static_cast<uint8_t>(info);
+        if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+            mMaxNumElementsAllowed = 0;
+        }
+
+        // find an empty spot
+        while (0 != mInfo[idx]) {
+            next(&info, &idx);
+        }
+
+        auto& l = mKeyVals[insertion_idx];
+        if (idx == insertion_idx) {
+            ::new (static_cast<void*>(&l)) Node(std::move(keyval));
+        } else {
+            shiftUp(idx, insertion_idx);
+            l = std::move(keyval);
+        }
+
+        // put at empty spot
+        mInfo[insertion_idx] = insertion_info;
+
+        ++mNumElements;
+    }
+
+public:
+    using iterator = Iter<false>;
+    using const_iterator = Iter<true>;
+
+    Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
+        : WHash()
+        , WKeyEqual() {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
+    // This tremendously speeds up ctor & dtor of a map that never receives an element. The
+    // penalty is payed at the first insert, and not before. Lookup of this empty map works
+    // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
+    // standard, but we can ignore it.
+    explicit Table(
+        size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
+        const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    template <typename Iter>
+    Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0,
+          const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(first, last);
+    }
+
+    Table(std::initializer_list<value_type> initlist,
+          size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+          const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(initlist.begin(), initlist.end());
+    }
+
+    Table(Table&& o) noexcept
+        : WHash(std::move(static_cast<WHash&>(o)))
+        , WKeyEqual(std::move(static_cast<WKeyEqual&>(o)))
+        , DataPool(std::move(static_cast<DataPool&>(o))) {
+        ROBIN_HOOD_TRACE(this)
+        if (o.mMask) {
+            mHashMultiplier = std::move(o.mHashMultiplier);
+            mKeyVals = std::move(o.mKeyVals);
+            mInfo = std::move(o.mInfo);
+            mNumElements = std::move(o.mNumElements);
+            mMask = std::move(o.mMask);
+            mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+            mInfoInc = std::move(o.mInfoInc);
+            mInfoHashShift = std::move(o.mInfoHashShift);
+            // set other's mask to 0 so its destructor won't do anything
+            o.init();
+        }
+    }
+
+    Table& operator=(Table&& o) noexcept {
+        ROBIN_HOOD_TRACE(this)
+        if (&o != this) {
+            if (o.mMask) {
+                // only move stuff if the other map actually has some data
+                destroy();
+                mHashMultiplier = std::move(o.mHashMultiplier);
+                mKeyVals = std::move(o.mKeyVals);
+                mInfo = std::move(o.mInfo);
+                mNumElements = std::move(o.mNumElements);
+                mMask = std::move(o.mMask);
+                mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+                mInfoInc = std::move(o.mInfoInc);
+                mInfoHashShift = std::move(o.mInfoHashShift);
+                WHash::operator=(std::move(static_cast<WHash&>(o)));
+                WKeyEqual::operator=(std::move(static_cast<WKeyEqual&>(o)));
+                DataPool::operator=(std::move(static_cast<DataPool&>(o)));
+
+                o.init();
+
+            } else {
+                // nothing in the other map => just clear us.
+                clear();
+            }
+        }
+        return *this;
+    }
+
+    Table(const Table& o)
+        : WHash(static_cast<const WHash&>(o))
+        , WKeyEqual(static_cast<const WKeyEqual&>(o))
+        , DataPool(static_cast<const DataPool&>(o)) {
+        ROBIN_HOOD_TRACE(this)
+        if (!o.empty()) {
+            // not empty: create an exact copy. it is also possible to just iterate through all
+            // elements and insert them, but copying is probably faster.
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mHashMultiplier = o.mHashMultiplier;
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+            // no need for calloc because clonData does memcpy
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            mNumElements = o.mNumElements;
+            mMask = o.mMask;
+            mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+            mInfoInc = o.mInfoInc;
+            mInfoHashShift = o.mInfoHashShift;
+            cloneData(o);
+        }
+    }
+
+    // Creates a copy of the given map. Copy constructor of each entry is used.
+    // Not sure why clang-tidy thinks this doesn't handle self assignment, it does
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    Table& operator=(Table const& o) {
+        ROBIN_HOOD_TRACE(this)
+        if (&o == this) {
+            // prevent assigning of itself
+            return *this;
+        }
+
+        // we keep using the old allocator and not assign the new one, because we want to keep
+        // the memory available. when it is the same size.
+        if (o.empty()) {
+            if (0 == mMask) {
+                // nothing to do, we are empty too
+                return *this;
+            }
+
+            // not empty: destroy what we have there
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            destroy();
+            init();
+            WHash::operator=(static_cast<const WHash&>(o));
+            WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+            DataPool::operator=(static_cast<DataPool const&>(o));
+
+            return *this;
+        }
+
+        // clean up old stuff
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        if (mMask != o.mMask) {
+            // no luck: we don't have the same array size allocated, so we need to realloc.
+            if (0 != mMask) {
+                // only deallocate if we actually have data!
+                ROBIN_HOOD_LOG("std::free")
+                std::free(mKeyVals);
+            }
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+
+            // no need for calloc here because cloneData performs a memcpy.
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            // sentinel is set in cloneData
+        }
+        WHash::operator=(static_cast<const WHash&>(o));
+        WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+        DataPool::operator=(static_cast<DataPool const&>(o));
+        mHashMultiplier = o.mHashMultiplier;
+        mNumElements = o.mNumElements;
+        mMask = o.mMask;
+        mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+        mInfoInc = o.mInfoInc;
+        mInfoHashShift = o.mInfoHashShift;
+        cloneData(o);
+
+        return *this;
+    }
+
+    // Swaps everything between the two maps.
+    void swap(Table& o) {
+        ROBIN_HOOD_TRACE(this)
+        using std::swap;
+        swap(o, *this);
+    }
+
+    // Clears all data, without resizing.
+    void clear() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            // don't do anything! also important because we don't want to write to
+            // DummyInfoByte::b, even though we would just write 0 to it.
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+        // clear everything, then set the sentinel again
+        uint8_t const z = 0;
+        std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z);
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // Destroys the map and all it's contents.
+    ~Table() {
+        ROBIN_HOOD_TRACE(this)
+        destroy();
+    }
+
+    // Checks if both tables contain the same entries. Order is irrelevant.
+    bool operator==(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        if (other.size() != size()) {
+            return false;
+        }
+        for (auto const& otherEntry : other) {
+            if (!has(otherEntry)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool operator!=(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        return !operator==(other);
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(key),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(key), std::forward_as_tuple());
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+        }
+
+        return mKeyVals[idxAndState.first].getSecond();
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](key_type&& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] =
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+        }
+
+        return mKeyVals[idxAndState.first].getSecond();
+    }
+
+    template <typename Iter>
+    void insert(Iter first, Iter last) {
+        for (; first != last; ++first) {
+            // value_type ctor needed because this might be called with std::pair's
+            insert(value_type(*first));
+        }
+    }
+
+    void insert(std::initializer_list<value_type> ilist) {
+        for (auto&& vt : ilist) {
+            insert(std::move(vt));
+        }
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        Node n{*this, std::forward<Args>(args)...};
+        auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n));
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            n.destroy(*this);
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(*this, std::move(n));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = std::move(n);
+            break;
+
+        case InsertionState::overflow_error:
+            n.destroy(*this);
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    template <typename... Args>
+    iterator emplace_hint(const_iterator position, Args&&... args) {
+        (void)position;
+        return emplace(std::forward<Args>(args)...).first;
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& key, Args&&... args) {
+        return try_emplace_impl(key, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(key, std::forward<Args>(args)...).first;
+    }
+
+    template <typename... Args>
+    iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...).first;
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const key_type& key, Mapped&& obj) {
+        return insertOrAssignImpl(key, std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(key_type&& key, Mapped&& obj) {
+        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) {
+        (void)hint;
+        return insertOrAssignImpl(key, std::forward<Mapped>(obj)).first;
+    }
+
+    template <typename Mapped>
+    iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) {
+        (void)hint;
+        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj)).first;
+    }
+
+    std::pair<iterator, bool> insert(const value_type& keyval) {
+        ROBIN_HOOD_TRACE(this)
+        return emplace(keyval);
+    }
+
+    iterator insert(const_iterator hint, const value_type& keyval) {
+        (void)hint;
+        return emplace(keyval).first;
+    }
+
+    std::pair<iterator, bool> insert(value_type&& keyval) {
+        return emplace(std::move(keyval));
+    }
+
+    iterator insert(const_iterator hint, value_type&& keyval) {
+        (void)hint;
+        return emplace(std::move(keyval)).first;
+    }
+
+    // Returns 1 if key is found, 0 otherwise.
+    size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, size_t>::type count(const OtherKey& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        return 1U == count(key);
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, bool>::type contains(const OtherKey& key) const {
+        return 1U == count(key);
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type at(key_type const& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q const&>::type at(key_type const& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, // NOLINT(modernize-use-nodiscard)
+                            const_iterator>::type  // NOLINT(modernize-use-nodiscard)
+    find(const OtherKey& key) const {              // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator find(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    iterator find(const OtherKey& key, is_transparent_tag /*unused*/) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, iterator>::type find(const OtherKey& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator begin() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return end();
+        }
+        return iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+    const_iterator begin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cbegin();
+    }
+    const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return cend();
+        }
+        return const_iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+
+    iterator end() {
+        ROBIN_HOOD_TRACE(this)
+        // no need to supply valid info pointer: end() must not be dereferenced, and only node
+        // pointer is compared.
+        return iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+    const_iterator end() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cend();
+    }
+    const_iterator cend() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return const_iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+
+    iterator erase(const_iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // its safe to perform const cast here
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+        return erase(iterator{const_cast<Node*>(pos.mKeyVals), const_cast<uint8_t*>(pos.mInfo)});
+    }
+
+    // Erases element at pos, returns iterator to the next element.
+    iterator erase(iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // we assume that pos always points to a valid entry, and not end().
+        auto const idx = static_cast<size_t>(pos.mKeyVals - mKeyVals);
+
+        shiftDown(idx);
+        --mNumElements;
+
+        if (*pos.mInfo) {
+            // we've backward shifted, return this again
+            return pos;
+        }
+
+        // no backward shift, return next element
+        return ++pos;
+    }
+
+    size_t erase(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        // check while info matches with the source idx
+        do {
+            if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                shiftDown(idx);
+                --mNumElements;
+                return 1;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found to delete
+        return 0;
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // exactly the same as reserve(c).
+    void rehash(size_t c) {
+        // forces a reserve
+        reserve(c, true);
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
+    void reserve(size_t c) {
+        // reserve, but don't force rehash
+        reserve(c, false);
+    }
+
+    // If possible reallocates the map to a smaller one. This frees the underlying table.
+    // Does not do anything if load_factor is too large for decreasing the table's size.
+    void compact() {
+        ROBIN_HOOD_TRACE(this)
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+        // only actually do anything when the new size is bigger than the old one. This prevents to
+        // continuously allocate for each reserve() call.
+        if (newSize < mMask + 1) {
+            rehashPowerOfTwo(newSize, true);
+        }
+    }
+
+    size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return mNumElements;
+    }
+
+    size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<size_type>(-1);
+    }
+
+    ROBIN_HOOD(NODISCARD) bool empty() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return 0 == mNumElements;
+    }
+
+    float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return MaxLoadFactor100 / 100.0F;
+    }
+
+    // Average number of elements per bucket. Since we allow only 1 per bucket
+    float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<float>(size()) / static_cast<float>(mMask + 1);
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t mask() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return mMask;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
+        if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
+            return maxElements * MaxLoadFactor100 / 100;
+        }
+
+        // we might be a bit inprecise, but since maxElements is quite large that doesn't matter
+        return (maxElements / 100) * MaxLoadFactor100;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept {
+        // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load
+        // 64bit types.
+        return numElements + sizeof(uint64_t);
+    }
+
+    ROBIN_HOOD(NODISCARD)
+    size_t calcNumElementsWithBuffer(size_t numElements) const noexcept {
+        auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements);
+        return numElements + (std::min)(maxNumElementsAllowed, (static_cast<size_t>(0xFF)));
+    }
+
+    // calculation only allowed for 2^n values
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const {
+#if ROBIN_HOOD(BITNESS) == 64
+        return numElements * sizeof(Node) + calcNumBytesInfo(numElements);
+#else
+        // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows.
+        auto const ne = static_cast<uint64_t>(numElements);
+        auto const s = static_cast<uint64_t>(sizeof(Node));
+        auto const infos = static_cast<uint64_t>(calcNumBytesInfo(numElements));
+
+        auto const total64 = ne * s + infos;
+        auto const total = static_cast<size_t>(total64);
+
+        if (ROBIN_HOOD_UNLIKELY(static_cast<uint64_t>(total) != total64)) {
+            throwOverflowError();
+        }
+        return total;
+#endif
+    }
+
+private:
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(e.first);
+        return it != end() && it->second == e.second;
+    }
+
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        return find(e) != end();
+    }
+
+    void reserve(size_t c, bool forceRehash) {
+        ROBIN_HOOD_TRACE(this)
+        auto const minElementsAllowed = (std::max)(c, mNumElements);
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+        // only actually do anything when the new size is bigger than the old one. This prevents to
+        // continuously allocate for each reserve() call.
+        if (forceRehash || newSize > mMask + 1) {
+            rehashPowerOfTwo(newSize, false);
+        }
+    }
+
+    // reserves space for at least the specified number of elements.
+    // only works if numBuckets if power of two
+    // True on success, false otherwise
+    void rehashPowerOfTwo(size_t numBuckets, bool forceFree) {
+        ROBIN_HOOD_TRACE(this)
+
+        Node* const oldKeyVals = mKeyVals;
+        uint8_t const* const oldInfo = mInfo;
+
+        const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        // resize operation: move stuff
+        initData(numBuckets);
+        if (oldMaxElementsWithBuffer > 1) {
+            for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) {
+                if (oldInfo[i] != 0) {
+                    // might throw an exception, which is really bad since we are in the middle of
+                    // moving stuff.
+                    insert_move(std::move(oldKeyVals[i]));
+                    // destroy the node but DON'T destroy the data.
+                    oldKeyVals[i].~Node();
+                }
+            }
+
+            // this check is not necessary as it's guarded by the previous if, but it helps
+            // silence g++'s overeager "attempt to free a non-heap object 'map'
+            // [-Werror=free-nonheap-object]" warning.
+            if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+                // don't destroy old data: put it into the pool instead
+                if (forceFree) {
+                    std::free(oldKeyVals);
+                } else {
+                    DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+                }
+            }
+        }
+    }
+
+    ROBIN_HOOD(NOINLINE) void throwOverflowError() const {
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+        throw std::overflow_error("robin_hood::map overflow");
+#else
+        abort();
+#endif
+    }
+
+    template <typename OtherKey, typename... Args>
+    std::pair<iterator, bool> try_emplace_impl(OtherKey&& key, Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
+                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
+                std::forward_as_tuple(std::forward<Args>(args)...));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
+                                               std::forward_as_tuple(std::forward<Args>(args)...));
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    template <typename OtherKey, typename Mapped>
+    std::pair<iterator, bool> insertOrAssignImpl(OtherKey&& key, Mapped&& obj) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            mKeyVals[idxAndState.first].getSecond() = std::forward<Mapped>(obj);
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
+                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
+                std::forward_as_tuple(std::forward<Mapped>(obj)));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
+                                               std::forward_as_tuple(std::forward<Mapped>(obj)));
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    void initData(size_t max_elements) {
+        mNumElements = 0;
+        mMask = max_elements - 1;
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
+
+        // malloc & zero mInfo. Faster than calloc everything.
+        auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+        ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
+                                      << numElementsWithBuffer << ")")
+        mKeyVals = reinterpret_cast<Node*>(
+            detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+        mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+        std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node));
+
+        // set sentinel
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    enum class InsertionState { overflow_error, key_found, new_node, overwrite_node };
+
+    // Finds key, and if not already present prepares a spot where to pot the key & value.
+    // This potentially shifts nodes out of the way, updates mInfo and number of inserted
+    // elements, so the only operation left to do is create/assign a new node at that spot.
+    template <typename OtherKey>
+    std::pair<size_t, InsertionState> insertKeyPrepareEmptySpot(OtherKey&& key) {
+        for (int i = 0; i < 256; ++i) {
+            size_t idx{};
+            InfoType info{};
+            keyToIdx(key, &idx, &info);
+            nextWhileLess(&info, &idx);
+
+            // while we potentially have a match
+            while (info == mInfo[idx]) {
+                if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                    // key already exists, do NOT insert.
+                    // see http://en.cppreference.com/w/cpp/container/unordered_map/insert
+                    return std::make_pair(idx, InsertionState::key_found);
+                }
+                next(&info, &idx);
+            }
+
+            // unlikely that this evaluates to true
+            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
+                if (!increase_size()) {
+                    return std::make_pair(size_t(0), InsertionState::overflow_error);
+                }
+                continue;
+            }
+
+            // key not found, so we are now exactly where we want to insert it.
+            auto const insertion_idx = idx;
+            auto const insertion_info = info;
+            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+
+            // find an empty spot
+            while (0 != mInfo[idx]) {
+                next(&info, &idx);
+            }
+
+            if (idx != insertion_idx) {
+                shiftUp(idx, insertion_idx);
+            }
+            // put at empty spot
+            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
+            ++mNumElements;
+            return std::make_pair(insertion_idx, idx == insertion_idx
+                                                     ? InsertionState::new_node
+                                                     : InsertionState::overwrite_node);
+        }
+
+        // enough attempts failed, so finally give up.
+        return std::make_pair(size_t(0), InsertionState::overflow_error);
+    }
+
+    bool try_increase_info() {
+        ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements
+                                   << ", maxNumElementsAllowed="
+                                   << calcMaxNumElementsAllowed(mMask + 1))
+        if (mInfoInc <= 2) {
+            // need to be > 2 so that shift works (otherwise undefined behavior!)
+            return false;
+        }
+        // we got space left, try to make info smaller
+        mInfoInc = static_cast<uint8_t>(mInfoInc >> 1U);
+
+        // remove one bit of the hash, leaving more space for the distance info.
+        // This is extremely fast because we can operate on 8 bytes at once.
+        ++mInfoHashShift;
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        for (size_t i = 0; i < numElementsWithBuffer; i += 8) {
+            auto val = unaligned_load<uint64_t>(mInfo + i);
+            val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f);
+            std::memcpy(mInfo + i, &val, sizeof(val));
+        }
+        // update sentinel, which might have been cleared out!
+        mInfo[numElementsWithBuffer] = 1;
+
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        return true;
+    }
+
+    // True if resize was possible, false otherwise
+    bool increase_size() {
+        // nothing allocated yet? just allocate InitialNumElements
+        if (0 == mMask) {
+            initData(InitialNumElements);
+            return true;
+        }
+
+        auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        if (mNumElements < maxNumElementsAllowed && try_increase_info()) {
+            return true;
+        }
+
+        ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed="
+                                       << maxNumElementsAllowed << ", load="
+                                       << (static_cast<double>(mNumElements) * 100.0 /
+                                           (static_cast<double>(mMask) + 1)))
+
+        if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) {
+            // we have to resize, even though there would still be plenty of space left!
+            // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case
+            // we have to rehash a few times
+            nextHashMultiplier();
+            rehashPowerOfTwo(mMask + 1, true);
+        } else {
+            // we've reached the capacity of the map, so the hash seems to work nice. Keep using it.
+            rehashPowerOfTwo((mMask + 1) * 2, false);
+        }
+        return true;
+    }
+
+    void nextHashMultiplier() {
+        // adding an *even* number, so that the multiplier will always stay odd. This is necessary
+        // so that the hash stays a mixing function (and thus doesn't have any information loss).
+        mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54);
+    }
+
+    void destroy() {
+        if (0 == mMask) {
+            // don't deallocate!
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}
+            .nodesDoNotDeallocate(*this);
+
+        // This protection against not deleting mMask shouldn't be needed as it's sufficiently
+        // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
+        // reports a compile error: attempt to free a non-heap object 'fm'
+        // [-Werror=free-nonheap-object]
+        if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mKeyVals);
+        }
+    }
+
+    void init() noexcept {
+        mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask);
+        mInfo = reinterpret_cast<uint8_t*>(&mMask);
+        mNumElements = 0;
+        mMask = 0;
+        mMaxNumElementsAllowed = 0;
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // members are sorted so no padding occurs
+    uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53);                // 8 byte  8
+    Node* mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask); // 8 byte 16
+    uint8_t* mInfo = reinterpret_cast<uint8_t*>(&mMask);                    // 8 byte 24
+    size_t mNumElements = 0;                                                // 8 byte 32
+    size_t mMask = 0;                                                       // 8 byte 40
+    size_t mMaxNumElementsAllowed = 0;                                      // 8 byte 48
+    InfoType mInfoInc = InitialInfoInc;                                     // 4 byte 52
+    InfoType mInfoHashShift = InitialInfoHashShift;                         // 4 byte 56
+                                                    // 16 byte 56 if NodeAllocator
+};
+
+} // namespace detail
+
+// map
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_flat_map = detail::Table<true, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_node_map = detail::Table<false, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_map =
+    detail::Table<sizeof(robin_hood::pair<Key, T>) <= sizeof(size_t) * 6 &&
+                      std::is_nothrow_move_constructible<robin_hood::pair<Key, T>>::value &&
+                      std::is_nothrow_move_assignable<robin_hood::pair<Key, T>>::value,
+                  MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+// set
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_flat_set = detail::Table<true, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_node_set = detail::Table<false, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
+                                        std::is_nothrow_move_constructible<Key>::value &&
+                                        std::is_nothrow_move_assignable<Key>::value,
+                                    MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+} // namespace robin_hood
+
+#endif

From 5ca38bfc50bb9bbafa3c5d30d2969fd115349ccf Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 15:37:29 -0800
Subject: [PATCH 06/49] Change typedef to macro for map

---
 src/Common.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common.hpp b/src/Common.hpp
index 940a4a8..98b0395 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -13,7 +13,7 @@
 
 #define BUSTOOLS_VERSION "0.42.0"
 
-typedef robin_hood::unordered_flat_map u_map_;
+#define u_map_ robin_hood::unordered_flat_map
 enum CAPTURE_TYPE : char
 {
   CAPTURE_NONE = 0,

From 2a74dc3e7a6bf4092b0a812b3fb5f09c446d831f Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 15:50:17 -0800
Subject: [PATCH 07/49] fix bustools inspect

---
 src/bustools_inspect.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bustools_inspect.cpp b/src/bustools_inspect.cpp
index 2808474..25694cf 100644
--- a/src/bustools_inspect.cpp
+++ b/src/bustools_inspect.cpp
@@ -100,7 +100,7 @@ void bustools_inspect(Bustools_opt &opt) {
   int64_t gt_records = 0;
 
   /* Frequency of number of targets per set, with multiplicity. */
-  u_map_<uint32_t, uint32_t> freq_targetsPerSet;
+  std::unordered_map<uint32_t, uint32_t> freq_targetsPerSet;
   /* Frequency of targets (for Good-Toulmin). */
   std::vector<uint32_t> freq_targets(numTargets, 0);
 
@@ -273,7 +273,7 @@ void bustools_inspect(Bustools_opt &opt) {
   // Good-Toulmin for number of targets
   // Also number of targets detected
   uint64_t targetsDetected = 0;
-  u_map_<uint32_t, uint32_t> freq_freq_targets;
+  std::unordered_map<uint32_t, uint32_t> freq_freq_targets;
   for (const auto &elt : freq_targets) {
     if (elt) {
       ++targetsDetected;

From 41f631e7864816041476c549b0eeacae38895fb7 Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 16:02:55 -0800
Subject: [PATCH 08/49] fix mash

---
 src/bustools_mash.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/bustools_mash.cpp b/src/bustools_mash.cpp
index 8faa65d..1df9702 100644
--- a/src/bustools_mash.cpp
+++ b/src/bustools_mash.cpp
@@ -105,8 +105,10 @@ void bustools_mash(const Bustools_opt &opt)
 
     for (int32_t i = 0; i < tid; i++)
     {
-        oh.ecs.push_back({i});
-        ecmapinv.insert({{i}, i});
+        std::vector<int32_t> tmp_vec;
+        tmp_vec.push_back(i);
+        oh.ecs.push_back(tmp_vec);
+        ecmapinv.insert({tmp_vec, i});
     }
 
     std::vector<std::vector<int32_t>> eids_per_file;

From 07eeb6686c57f7a8e4ac9a0e24dd5827f404cb8a Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 16:06:17 -0800
Subject: [PATCH 09/49] fix undefined reference

---
 src/Common.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common.cpp b/src/Common.cpp
index 717ee24..5a81674 100644
--- a/src/Common.cpp
+++ b/src/Common.cpp
@@ -73,7 +73,7 @@ std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &
   return std::move(u);
 }
 
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
   if (ecs.empty()) {
     return -1;
   }
@@ -212,7 +212,7 @@ void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const  std::vector<
 }
 
 
-int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, std::unordered_map<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty) {
+int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty) {
   
   std::vector<std::vector<int32_t>> gu; // per gene transcript results
   std::vector<int32_t> u; // final list of transcripts

From bf16198a9aafba5e93a3bef1d229723e83a8ba44 Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 16:34:44 -0800
Subject: [PATCH 10/49] better hashing

---
 src/Common.hpp |  24 +++++-
 src/hash.cpp   | 194 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/hash.hpp   |  22 ++++++
 3 files changed, 238 insertions(+), 2 deletions(-)
 create mode 100644 src/hash.cpp
 create mode 100644 src/hash.hpp

diff --git a/src/Common.hpp b/src/Common.hpp
index 98b0395..cab0932 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -10,6 +10,8 @@
 #include <unordered_map>
 #include <sstream>
 #include "robin_hood.h"
+#include "roaring.h"
+#include "hash.hpp"
 
 #define BUSTOOLS_VERSION "0.42.0"
 
@@ -160,14 +162,32 @@ struct SortedVectorHasher
     int i = 0;
     for (auto x : v)
     {
-      uint64_t t = std::hash<int32_t>{}(x);
+      uint64_t t;
+      MurmurHash3_x64_64(&x,sizeof(x), 0,&t);
       t = (x >> i) | (x << (64 - i));
       r = r ^ t;
-      i = (i + 1) % 64;
+      i = (i+1)&63;
     }
     return r;
   }
 };
+
+struct RoaringHasher {
+  size_t operator()(const Roaring& rr) const {
+    uint64_t r = 0;
+    int i=0;
+    for (auto x : rr) {
+      uint64_t t;
+      MurmurHash3_x64_64(&x, sizeof(x), 0, &t);
+      t = (x>>i) | (x<<(64-i));
+      r ^= t;
+      i = (i+1)&63; // (i+1)%64
+    }
+    return r;
+  }
+};
+typedef u_map_<Roaring, int32_t, RoaringHasher> EcMapInv;
+
 std::vector<int32_t> intersect(std::vector<int32_t> &u, std::vector<int32_t> &v);
 std::vector<int32_t> union_vectors(const std::vector<std::vector<int32_t>> &v);
 std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &v);
diff --git a/src/hash.cpp b/src/hash.cpp
new file mode 100644
index 0000000..b2d18e1
--- /dev/null
+++ b/src/hash.cpp
@@ -0,0 +1,194 @@
+#include <stdint.h>
+#include <cstring>
+#include "hash.hpp"
+
+uint64_t inline _rotl64(uint64_t value, int8_t amount) {
+  return ((value) << (amount)) | ((value) >> (64 - (amount)));
+}
+
+uint32_t SuperFastHash (const char *data, int len) {
+  uint32_t hash = len, tmp;
+  int rem;
+
+  if (len <= 0 || data == NULL) { return 0; }
+
+  rem = len & 3;
+  len >>= 2;
+
+  /* Main loop */
+  for (; len > 0; len--) {
+    hash  += get16bits (data);
+    tmp    = (get16bits (data+2) << 11) ^ hash;
+    hash   = (hash << 16) ^ tmp;
+    data  += 2*sizeof (uint16_t);
+    hash  += hash >> 11;
+  }
+
+  /* Handle end cases */
+  switch (rem) {
+  case 3: hash += get16bits (data);
+    hash ^= hash << 16;
+    hash ^= data[sizeof (uint16_t)] << 18;
+    hash += hash >> 11;
+    break;
+  case 2: hash += get16bits (data);
+    hash ^= hash << 11;
+    hash += hash >> 17;
+    break;
+  case 1: hash += *data;
+    hash ^= hash << 10;
+    hash += hash >> 1;
+  }
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
+}
+
+
+
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+inline uint64_t getblock ( const uint64_t *p, int i ) {
+  return p[i];
+}
+
+//----------
+// Block mix - combine the key bits with the hash bits and scramble everything
+
+inline void bmix64 ( uint64_t& h1, uint64_t& h2, uint64_t& k1, uint64_t& k2, uint64_t& c1, uint64_t& c2 ) {
+  k1 *= c1;
+  k1  = _rotl64(k1,23);
+  k1 *= c2;
+  h1 ^= k1;
+  h1 += h2;
+
+  h2 = _rotl64(h2,41);
+
+  k2 *= c2;
+  k2  = _rotl64(k2,23);
+  k2 *= c1;
+  h2 ^= k2;
+  h2 += h1;
+
+  h1 = h1*3+0x52dce729;
+  h2 = h2*3+0x38495ab5;
+
+  c1 = c1*5+0x7b7d159c;
+  c2 = c2*5+0x6bce6396;
+}
+
+//----------
+// Finalization mix - avalanches all bits to within 0.05% bias
+
+inline uint64_t fmix64 ( uint64_t k ) {
+  k ^= k >> 33;
+  k *= 0xff51afd7ed558ccd;
+  k ^= k >> 33;
+  k *= 0xc4ceb9fe1a85ec53;
+  k ^= k >> 33;
+
+  return k;
+}
+
+void MurmurHash3_x64_128 ( const void *key, const int len, const uint32_t seed, void *out ) {
+  const uint8_t *data = (const uint8_t *)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
+  uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
+
+  uint64_t c1 = 0x87c37b91114253d5;
+  uint64_t c2 = 0x4cf5ad432745937f;
+
+  //----------
+  // body
+
+  const uint64_t *blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++) {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    bmix64(h1,h2,k1,k2,c1,c2);
+  }
+
+  //----------
+  // tail
+
+  const uint8_t *tail = (const uint8_t *)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15) {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+    bmix64(h1,h2,k1,k2,c1,c2);
+  };
+
+  //----------
+  // finalization
+
+  h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t *)out)[0] = h1;
+  ((uint64_t *)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+// If we need a smaller hash value, it's faster to just use a portion of the
+// 128-bit hash
+
+void MurmurHash3_x64_32 ( const void *key, int len, uint32_t seed, void *out ) {
+  uint32_t temp[4];
+
+  MurmurHash3_x64_128(key,len,seed,temp);
+
+  *(uint32_t *)out = temp[0];
+}
+
+//----------
+
+void MurmurHash3_x64_64 ( const void *key, int len, uint32_t seed, void *out ) {
+  uint64_t temp[2];
+
+  MurmurHash3_x64_128(key,len,seed,temp);
+
+  *(uint64_t *)out = temp[0];
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/src/hash.hpp b/src/hash.hpp
new file mode 100644
index 0000000..ab2c0d1
--- /dev/null
+++ b/src/hash.hpp
@@ -0,0 +1,22 @@
+#ifndef HASH_H
+#define HASH_H
+
+#include <stdint.h> /* Replace with <stdint.h> if appropriate */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+
+uint32_t SuperFastHash (const char *data, int len);
+
+//void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x64_64 ( const void *key, int len, uint32_t seed, void *out );
+
+#endif
+

From 7dd1eb82105a364494946c7043e21fc4aaff051e Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 17:00:14 -0800
Subject: [PATCH 11/49] fix roaring

---
 src/Common.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common.hpp b/src/Common.hpp
index cab0932..a0985c3 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -10,7 +10,7 @@
 #include <unordered_map>
 #include <sstream>
 #include "robin_hood.h"
-#include "roaring.h"
+#include "roaring.hh"
 #include "hash.hpp"
 
 #define BUSTOOLS_VERSION "0.42.0"

From 19b3674900d9ee283877b243c391f42c62185168 Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 21:01:24 -0800
Subject: [PATCH 12/49] ecmapinv: attempt to use bitmap

---
 src/Common.cpp         | 86 ++++++++++++++++++------------------------
 src/Common.hpp         |  4 +-
 src/bustools_count.cpp |  8 ++--
 3 files changed, 42 insertions(+), 56 deletions(-)

diff --git a/src/Common.cpp b/src/Common.cpp
index 5a81674..9a95e54 100644
--- a/src/Common.cpp
+++ b/src/Common.cpp
@@ -73,7 +73,7 @@ std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &
   return std::move(u);
 }
 
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
   if (ecs.empty()) {
     return -1;
   }
@@ -86,58 +86,35 @@ int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u,
     return ecs[0]; // no work
   }
 
-  u.resize(0);
-  auto &v = ecmap[ecs[0]]; // copy
-  for (size_t i = 0; i< v.size(); i++) {
-    u.push_back(v[i]);
-  }
-
+  uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[0]][0])));
+  u = Roaring(ecmap[ecs[0]].size(), data);
+  
   for (size_t i = 1; i < ecs.size(); i++) {
     if (ecs[i] < 0 || ecs[i] >= ecmap.size()) {
       return -1;
     }
-    const auto &v = ecmap[ecs[i]];
-
-    int j = 0;
-    int k = 0;
-    int l = 0;
-    int n = u.size();
-    int m = v.size();
-    // u and v are sorted, j,k,l = 0
-    while (j < n && l < m) {
-      // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m
-      //            u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted
-      if (u[j] < v[l]) {
-        j++;
-      } else if (u[j] > v[l]) {
-        l++;
-      } else {
-        // match
-        if (k < j) {
-          std::swap(u[k], u[j]);
-        }
-        k++;
-        j++;
-        l++;
-      }
-    }
-    if (k < n) {
-      u.resize(k);
-    }
+    data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[i]][0])));
+    u &= Roaring(ecmap[ecs[i]].size(), data);
   }
 
-  if (u.empty()) {
+  if (u.isEmpty()) {
     return -1;
   }
   auto iit = ecmapinv.find(u);
   if (iit == ecmapinv.end()) { 
     // create new equivalence class
     int32_t ec = ecmap.size();
-    ecmap.push_back(u);
+    uint32_t* u_arr = new uint32_t[u.cardinality()];
+    u.toUint32Array(u_arr);
+    std::vector<int32_t> u_vec;
+    u_vec.reserve(u.cardinality());
+    for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
+    delete[] u_arr;
+    ecmap.push_back(u_vec);
     ecmapinv.insert({u,ec});
     // figure out the gene list
     std::vector<int32_t> v;
-    vt2gene(u, genemap, v);
+    vt2gene(u_vec, genemap, v);
     ec2genes.push_back(std::move(v));
     return ec;
   } else {
@@ -212,10 +189,10 @@ void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const  std::vector<
 }
 
 
-int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty) {
+int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty) {
   
   std::vector<std::vector<int32_t>> gu; // per gene transcript results
-  std::vector<int32_t> u; // final list of transcripts
+  Roaring u; // final list of transcripts
   std::vector<int32_t> glist;
 
   int32_t lastg = -2;
@@ -245,11 +222,9 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     // frequent case, single gene replace with union
     for (auto ec : ecs) {
       for (const auto &t : ecmap[ec]) {      
-        u.push_back(t);
+        u.add(t);
       }
     }
-    std::sort(u.begin(), u.end());
-    u.erase(std::unique(u.begin(), u.end()), u.end());
 
     // look up ecs based on u
     int32_t ec = -1;
@@ -260,9 +235,15 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});  
-      ecmap.push_back(u);
+      uint32_t* u_arr = new uint32_t[u.cardinality()];
+      u.toUint32Array(u_arr);
+      std::vector<int32_t> u_vec;
+      u_vec.reserve(u.cardinality());
+      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
+      delete[] u_arr;
+      ecmap.push_back(u_vec);
       std::vector<int32_t> v;
-      vt2gene(u, genemap, v);
+      vt2gene(u_vec, genemap, v);
       ec2genes.push_back(std::move(v));
     }
 
@@ -291,14 +272,13 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
       }
 
       for (auto t : uu) { 
-        u.push_back(t);
+        u.add(t);
       }
     }
 
-    if (u.empty()) {
+    if (u.isEmpty()) {
       return -1;
     }
-    std::sort(u.begin(), u.end());
 
     int32_t ec = -1;
     auto it = ecmapinv.find(u);
@@ -307,9 +287,15 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});
-      ecmap.push_back(u);
+      uint32_t* u_arr = new uint32_t[u.cardinality()];
+      u.toUint32Array(u_arr);
+      std::vector<int32_t> u_vec;
+      u_vec.reserve(u.cardinality());
+      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
+      delete[] u_arr;
+      ecmap.push_back(u_vec);
       std::vector<int32_t> v;
-      vt2gene(u, genemap, v);
+      vt2gene(u_vec, genemap, v);
       ec2genes.push_back(std::move(v));
     }
     return ec;
diff --git a/src/Common.hpp b/src/Common.hpp
index a0985c3..778cc12 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -191,10 +191,10 @@ typedef u_map_<Roaring, int32_t, RoaringHasher> EcMapInv;
 std::vector<int32_t> intersect(std::vector<int32_t> &u, std::vector<int32_t> &v);
 std::vector<int32_t> union_vectors(const std::vector<std::vector<int32_t>> &v);
 std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &v);
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
 void vt2gene(const std::vector<int32_t> &v, const std::vector<int32_t> &genemap, std::vector<int32_t> &glist);
 void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const std::vector<std::vector<int32_t>> &ec2genes, std::vector<int32_t> &glist);
-int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
+int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
 void create_ec2genes(const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ec2gene);
 COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split);
 COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector<int32_t>& ecs, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split);
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index b4a367f..b5e50d5 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -18,7 +18,7 @@ void bustools_count(Bustools_opt &opt) {
 
   // read and parse the equivalence class files
 
-  u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> ecmapinv;
+  EcMapInv ecmapinv;
   std::vector<std::vector<int32_t>> ecmap;
 
   u_map_<std::string, int32_t> txnames;
@@ -34,7 +34,8 @@ void bustools_count(Bustools_opt &opt) {
   ecmap = std::move(h.ecs);
   ecmapinv.reserve(ecmap.size());
   for (int32_t ec = 0; ec < ecmap.size(); ec++) {
-    ecmapinv.insert({ecmap[ec], ec});
+    uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ec][0])));
+    ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec});
   }
   std::vector<std::vector<int32_t>> ec2genes;        
   create_ec2genes(ecmap, genemap, ec2genes);
@@ -85,8 +86,7 @@ void bustools_count(Bustools_opt &opt) {
   std::vector<int32_t> ecs;
   std::vector<int32_t> glist;
   ecs.reserve(100);
-  std::vector<int32_t> u;
-  u.reserve(100);
+  Roaring u;
   std::vector<int32_t> column_v;
   std::vector<std::pair<int32_t, std::pair<double, COUNT_MTX_TYPE>>> column_vp; // gene, {count, matrix type}
   if (!opt.count_collapse) {

From 097f99181c0df5db2a5b7ffb3d99043a9d3f76fb Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Mon, 23 Jan 2023 21:04:02 -0800
Subject: [PATCH 13/49] fix

---
 src/Common.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common.hpp b/src/Common.hpp
index 778cc12..4cc8596 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -191,7 +191,7 @@ typedef u_map_<Roaring, int32_t, RoaringHasher> EcMapInv;
 std::vector<int32_t> intersect(std::vector<int32_t> &u, std::vector<int32_t> &v);
 std::vector<int32_t> union_vectors(const std::vector<std::vector<int32_t>> &v);
 std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &v);
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
 void vt2gene(const std::vector<int32_t> &v, const std::vector<int32_t> &genemap, std::vector<int32_t> &glist);
 void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const std::vector<std::vector<int32_t>> &ec2genes, std::vector<int32_t> &glist);
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);

From 67ebc4632596ab21cd13c16619ec520273eb3858 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 6 Feb 2023 03:44:12 -0800
Subject: [PATCH 14/49] added -s count option to main

---
 src/bustools_main.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index 384dc2f..991c099 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -328,7 +328,7 @@ void parse_ProgramOptions_capture(int argc, char **argv, Bustools_opt &opt)
 
 void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
 {
-  const char *opt_string = "o:g:e:t:md:";
+  const char *opt_string = "o:g:e:t:md:s:";
   int gene_flag = 0;
   int umigene_flag = 0;
   int em_flag = 0;
@@ -348,6 +348,7 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
     {"hist", no_argument, &hist_flag, 1},
     {"downsample", required_argument, 0, 'd'},
     {"rawcounts", no_argument, &rawcounts_flag, 1},
+    {"split", required_argument, 0, 's'},
     {0, 0, 0, 0}};
   
   int option_index = 0, c;
@@ -375,6 +376,9 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
     case 'm':
       opt.count_gene_multimapping = true;
       break;
+    case 's':
+      opt.count_split = optarg;
+      break;
     default:
       break;
     }

From 3f38e15e0393906e191d9ce7659fb849a39a833a Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 6 Feb 2023 05:32:05 -0800
Subject: [PATCH 15/49] fix split

---
 src/bustools_count.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index b5e50d5..2347cf4 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -24,8 +24,6 @@ void bustools_count(Bustools_opt &opt) {
   u_map_<std::string, int32_t> txnames;
   auto txnames_split = txnames; // copy
   std::vector<int32_t> tx_split;
-  tx_split.reserve(txnames_split.size());
-  for (auto x : txnames_split) tx_split.push_back(txnames[x.first]);
   parseTranscripts(opt.count_txp, txnames);
   std::vector<int32_t> genemap(txnames.size(), -1);
   u_map_<std::string, int32_t> genenames;
@@ -66,6 +64,10 @@ void bustools_count(Bustools_opt &opt) {
   // If we need to split matrix
   if (count_split) {
     parseTranscripts(opt.count_split, txnames_split); // subset of txnames
+    tx_split.reserve(txnames_split.size());
+    for (auto x : txnames_split) {
+        if (txnames.count(x.first)) tx_split.push_back(txnames[x.first]);
+    }
     of_2.open(mtx_ofn_split_2);
     of_A.open(mtx_ofn_split_A);
     of_2 << ssHeader.str();

From 73bf6169d7ef12a49ea72c9894edc20a4cc6c239 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 7 Feb 2023 08:57:00 -0800
Subject: [PATCH 16/49] updates to make 3-matrix sparse

---
 src/bustools_count.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index 2347cf4..e2a63a7 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -414,17 +414,17 @@ void bustools_count(Bustools_opt &opt) {
         else if (mtx_type == COUNT_SPLIT) val_2 += column_vp[j].second.first;
         else val_A += column_vp[j].second.first;
       }
-      col_map.insert({column_vp[i].first,val});
+      if (!count_split || val != 0) col_map.insert({column_vp[i].first,val});
       if (count_split) {
-        col_map_2.insert({column_vp[i].first,val_2});
-        col_map_A.insert({column_vp[i].first,val_A});
+        if (val_2 != 0) col_map_2.insert({column_vp[i].first,val_2});
+        if (val_A != 0) col_map_A.insert({column_vp[i].first,val_A});
       }
       cols.push_back(column_vp[i].first);
 
       if (count_split) {
-        if (val > 0) n_entries++;
-        if (val_2 > 0) n_entries_2++;
-        if (val_A > 0) n_entries_A++;
+        if (val != 0) n_entries++;
+        if (val_2 != 0) n_entries_2++;
+        if (val_A != 0) n_entries_A++;
       } else {
         n_entries++;
       }

From 2ac7d9f4cd57cc3d951863c42d0c1a4377e38c8e Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 27 Feb 2023 23:25:35 -0800
Subject: [PATCH 17/49] mask length in bustools correct

---
 src/bustools_correct.cpp | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 588b744..eab438c 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -258,6 +258,7 @@ void bustools_split_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
+    uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));
@@ -276,7 +277,7 @@ void bustools_split_correct(Bustools_opt &opt)
 
         bd = p[i];
 
-        uint64_t b = bd.barcode;
+        uint64_t b = bd.barcode & len_mask;
         uint64_t bc12 = b & mask_12;
         uint64_t bc34 = (b >> (2 * len_12)) & mask_34;
 
@@ -366,14 +367,14 @@ void bustools_split_correct(Bustools_opt &opt)
 
             if (dump_bool)
             {
-              if (bd.barcode != old_barcode)
+              if (bd.barcode & len_mask != old_barcode)
               {
-                of << binaryToString(bd.barcode, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
-                old_barcode = bd.barcode;
+                of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
+                old_barcode = bd.barcode & len_mask;
               }
             }
 
-            bd.barcode = b_corrected;
+            bd.barcode = b_corrected | (bd.barcode & ~len_mask);
             bus_out.write((char *)&bd, sizeof(bd));
 
             if (corrected_12_flag && corrected_34_flag)
@@ -533,6 +534,7 @@ void bustools_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
+    uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));
@@ -546,7 +548,7 @@ void bustools_correct(Bustools_opt &opt)
       for (size_t i = 0; i < rc; i++)
       {
         bd = p[i];
-        auto it = wbc.find(bd.barcode);
+        auto it = wbc.find(bd.barcode & len_mask);
         if (it != wbc.end())
         {
           stat_white++;
@@ -554,7 +556,7 @@ void bustools_correct(Bustools_opt &opt)
         }
         else
         {
-          uint64_t b = bd.barcode;
+          uint64_t b = bd.barcode & len_mask;
           uint64_t lb = b & lower_mask;
           uint64_t ub = (b >> (2 * bc2)) & upper_mask;
           uint64_t lbc = 0, ubc = 0;
@@ -572,14 +574,14 @@ void bustools_correct(Bustools_opt &opt)
               uint64_t b_corrected = (ub << (2 * bc2)) | lbc;
               if (dump_bool)
               {
-                if (bd.barcode != old_barcode)
+                if (bd.barcode & len_mask != old_barcode)
                 {
-                  of << binaryToString(bd.barcode, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
-                  old_barcode = bd.barcode;
+                  of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
+                  old_barcode = bd.barcode & len_mask;
                 }
               }
 
-              bd.barcode = b_corrected;
+              bd.barcode = b_corrected | (bd.barcode & ~len_mask);
               bus_out.write((char *)&bd, sizeof(bd));
               stat_corr++;
             }
@@ -588,14 +590,14 @@ void bustools_correct(Bustools_opt &opt)
               uint64_t b_corrected = (ubc << (2 * bc2)) | lb;
               if (dump_bool)
               {
-                if (bd.barcode != old_barcode)
+                if (bd.barcode & len_mask != old_barcode)
                 {
-                  of << binaryToString(bd.barcode, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
-                  old_barcode = bd.barcode;
+                  of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
+                  old_barcode = bd.barcode & len_mask;
                 }
               }
 
-              bd.barcode = b_corrected;
+              bd.barcode = b_corrected | (bd.barcode & ~len_mask);
               bus_out.write((char *)&bd, sizeof(bd));
               stat_corr++;
             }
@@ -622,4 +624,4 @@ void bustools_correct(Bustools_opt &opt)
 
   delete[] p;
   p = nullptr;
-}
\ No newline at end of file
+}

From 3147f45c899fccb781410e5f688a0878ff903539 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 28 Feb 2023 00:24:15 -0800
Subject: [PATCH 18/49] fix len_mask

---
 src/bustools_correct.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index eab438c..19f688d 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -258,7 +258,7 @@ void bustools_split_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
-    uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen
+    uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));
@@ -534,7 +534,7 @@ void bustools_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
-    uint64_t len_mask = ((1 << bclen) - 1); // Only include n least significant bits where n=bclen
+    uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));

From 3d3df8818e5fcea880290907d84e5c4ffd50bd55 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 28 Feb 2023 00:39:29 -0800
Subject: [PATCH 19/49] fix len_mask again

---
 src/bustools_correct.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 19f688d..9c1c12b 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -258,7 +258,7 @@ void bustools_split_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
-    uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen
+    uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));
@@ -534,7 +534,7 @@ void bustools_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
-    uint64_t len_mask = ((1 << (2*bclen)) - 1); // Only include n least significant bits where n=bclen
+    uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));

From e3b6ff4ea2a9f4e69ed4b446e98898ffb4ffe656 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 28 Feb 2023 21:07:32 -0800
Subject: [PATCH 20/49] undo bitmap and unordered map

---
 src/Common.cpp         |   84 +-
 src/Common.hpp         |    7 +-
 src/bustools_count.cpp |    6 +-
 src/robin_hood.h       | 2544 ----------------------------------------
 4 files changed, 55 insertions(+), 2586 deletions(-)
 delete mode 100644 src/robin_hood.h

diff --git a/src/Common.cpp b/src/Common.cpp
index 9a95e54..b4770bd 100644
--- a/src/Common.cpp
+++ b/src/Common.cpp
@@ -73,7 +73,7 @@ std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &
   return std::move(u);
 }
 
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
   if (ecs.empty()) {
     return -1;
   }
@@ -85,36 +85,59 @@ int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::ve
   if (ecs.size() == 1) {
     return ecs[0]; // no work
   }
-
-  uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[0]][0])));
-  u = Roaring(ecmap[ecs[0]].size(), data);
   
+  u.resize(0);
+  auto &v = ecmap[ecs[0]]; // copy
+  for (size_t i = 0; i< v.size(); i++) {
+    u.push_back(v[i]);
+  }
+
   for (size_t i = 1; i < ecs.size(); i++) {
     if (ecs[i] < 0 || ecs[i] >= ecmap.size()) {
       return -1;
     }
-    data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[i]][0])));
-    u &= Roaring(ecmap[ecs[i]].size(), data);
+    const auto &v = ecmap[ecs[i]];
+    
+    int j = 0;
+    int k = 0;
+    int l = 0;
+    int n = u.size();
+    int m = v.size();
+    // u and v are sorted, j,k,l = 0
+    while (j < n && l < m) {
+      // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m
+      //            u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted
+      if (u[j] < v[l]) {
+        j++;
+      } else if (u[j] > v[l]) {
+        l++;
+      } else {
+        // match
+        if (k < j) {
+          std::swap(u[k], u[j]);
+        }
+        k++;
+        j++;
+        i++;
+      }
+    }
+    if (k < n) {
+      u.resize(k);
+    }
   }
 
-  if (u.isEmpty()) {
+  if (u.empty()) {
     return -1;
   }
   auto iit = ecmapinv.find(u);
   if (iit == ecmapinv.end()) { 
     // create new equivalence class
     int32_t ec = ecmap.size();
-    uint32_t* u_arr = new uint32_t[u.cardinality()];
-    u.toUint32Array(u_arr);
-    std::vector<int32_t> u_vec;
-    u_vec.reserve(u.cardinality());
-    for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
-    delete[] u_arr;
-    ecmap.push_back(u_vec);
+    ecmap.push_back(u);
     ecmapinv.insert({u,ec});
     // figure out the gene list
     std::vector<int32_t> v;
-    vt2gene(u_vec, genemap, v);
+    vt2gene(u, genemap, v);
     ec2genes.push_back(std::move(v));
     return ec;
   } else {
@@ -192,7 +215,7 @@ void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const  std::vector<
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty) {
   
   std::vector<std::vector<int32_t>> gu; // per gene transcript results
-  Roaring u; // final list of transcripts
+  std::vector<int32_t> u; // final list of transcripts
   std::vector<int32_t> glist;
 
   int32_t lastg = -2;
@@ -222,9 +245,11 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     // frequent case, single gene replace with union
     for (auto ec : ecs) {
       for (const auto &t : ecmap[ec]) {      
-        u.add(t);
+        u.push_back(t);
       }
     }
+    std::sort(u.begin(), u.end());
+    u.erase(std::unique(u.begin(), u.end()), u.end());
 
     // look up ecs based on u
     int32_t ec = -1;
@@ -235,15 +260,9 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});  
-      uint32_t* u_arr = new uint32_t[u.cardinality()];
-      u.toUint32Array(u_arr);
-      std::vector<int32_t> u_vec;
-      u_vec.reserve(u.cardinality());
-      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
-      delete[] u_arr;
-      ecmap.push_back(u_vec);
+      ecmap.push_back(u);
       std::vector<int32_t> v;
-      vt2gene(u_vec, genemap, v);
+      vt2gene(u, genemap, v);
       ec2genes.push_back(std::move(v));
     }
 
@@ -272,13 +291,14 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
       }
 
       for (auto t : uu) { 
-        u.add(t);
+        u.push_back(t);
       }
     }
 
-    if (u.isEmpty()) {
+    if (u.empty()) {
       return -1;
     }
+    std::sort(u.begin(), u.end());
 
     int32_t ec = -1;
     auto it = ecmapinv.find(u);
@@ -287,15 +307,9 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});
-      uint32_t* u_arr = new uint32_t[u.cardinality()];
-      u.toUint32Array(u_arr);
-      std::vector<int32_t> u_vec;
-      u_vec.reserve(u.cardinality());
-      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
-      delete[] u_arr;
-      ecmap.push_back(u_vec);
+      ecmap.push_back(u);
       std::vector<int32_t> v;
-      vt2gene(u_vec, genemap, v);
+      vt2gene(u, genemap, v);
       ec2genes.push_back(std::move(v));
     }
     return ec;
diff --git a/src/Common.hpp b/src/Common.hpp
index 4cc8596..6a50ebd 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -9,13 +9,12 @@
 #include <string>
 #include <unordered_map>
 #include <sstream>
-#include "robin_hood.h"
 #include "roaring.hh"
 #include "hash.hpp"
 
 #define BUSTOOLS_VERSION "0.42.0"
 
-#define u_map_ robin_hood::unordered_flat_map
+#define u_map_ std::unordered_map
 enum CAPTURE_TYPE : char
 {
   CAPTURE_NONE = 0,
@@ -186,12 +185,12 @@ struct RoaringHasher {
     return r;
   }
 };
-typedef u_map_<Roaring, int32_t, RoaringHasher> EcMapInv;
+typedef u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> EcMapInv;
 
 std::vector<int32_t> intersect(std::vector<int32_t> &u, std::vector<int32_t> &v);
 std::vector<int32_t> union_vectors(const std::vector<std::vector<int32_t>> &v);
 std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &v);
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
 void vt2gene(const std::vector<int32_t> &v, const std::vector<int32_t> &genemap, std::vector<int32_t> &glist);
 void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const std::vector<std::vector<int32_t>> &ec2genes, std::vector<int32_t> &glist);
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index e2a63a7..e0d125f 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -32,8 +32,7 @@ void bustools_count(Bustools_opt &opt) {
   ecmap = std::move(h.ecs);
   ecmapinv.reserve(ecmap.size());
   for (int32_t ec = 0; ec < ecmap.size(); ec++) {
-    uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ec][0])));
-    ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec});
+    ecmapinv.insert({ecmap[ec], ec});
   }
   std::vector<std::vector<int32_t>> ec2genes;        
   create_ec2genes(ecmap, genemap, ec2genes);
@@ -88,7 +87,8 @@ void bustools_count(Bustools_opt &opt) {
   std::vector<int32_t> ecs;
   std::vector<int32_t> glist;
   ecs.reserve(100);
-  Roaring u;
+  std::vector<int32_t> u;
+  u.reserve(100);
   std::vector<int32_t> column_v;
   std::vector<std::pair<int32_t, std::pair<double, COUNT_MTX_TYPE>>> column_vp; // gene, {count, matrix type}
   if (!opt.count_collapse) {
diff --git a/src/robin_hood.h b/src/robin_hood.h
deleted file mode 100644
index 0af031f..0000000
--- a/src/robin_hood.h
+++ /dev/null
@@ -1,2544 +0,0 @@
-//                 ______  _____                 ______                _________
-//  ______________ ___  /_ ___(_)_______         ___  /_ ______ ______ ______  /
-//  __  ___/_  __ \__  __ \__  / __  __ \        __  __ \_  __ \_  __ \_  __  /
-//  _  /    / /_/ /_  /_/ /_  /  _  / / /        _  / / // /_/ // /_/ // /_/ /
-//  /_/     \____/ /_.___/ /_/   /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/
-//                                      _/_____/
-//
-// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
-// https://github.com/martinus/robin-hood-hashing
-//
-// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2021 Martin Ankerl <http://martin.ankerl.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-
-#ifndef ROBIN_HOOD_H_INCLUDED
-#define ROBIN_HOOD_H_INCLUDED
-
-// see https://semver.org/
-#define ROBIN_HOOD_VERSION_MAJOR 3  // for incompatible API changes
-#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner
-#define ROBIN_HOOD_VERSION_PATCH 5  // for backwards-compatible bug fixes
-
-#include <algorithm>
-#include <cstdlib>
-#include <cstring>
-#include <functional>
-#include <limits>
-#include <memory> // only to support hash of smart pointers
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-#if __cplusplus >= 201703L
-#    include <string_view>
-#endif
-
-// #define ROBIN_HOOD_LOG_ENABLED
-#ifdef ROBIN_HOOD_LOG_ENABLED
-#    include <iostream>
-#    define ROBIN_HOOD_LOG(...) \
-        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
-#else
-#    define ROBIN_HOOD_LOG(x)
-#endif
-
-// #define ROBIN_HOOD_TRACE_ENABLED
-#ifdef ROBIN_HOOD_TRACE_ENABLED
-#    include <iostream>
-#    define ROBIN_HOOD_TRACE(...) \
-        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
-#else
-#    define ROBIN_HOOD_TRACE(x)
-#endif
-
-// #define ROBIN_HOOD_COUNT_ENABLED
-#ifdef ROBIN_HOOD_COUNT_ENABLED
-#    include <iostream>
-#    define ROBIN_HOOD_COUNT(x) ++counts().x;
-namespace robin_hood {
-struct Counts {
-    uint64_t shiftUp{};
-    uint64_t shiftDown{};
-};
-inline std::ostream& operator<<(std::ostream& os, Counts const& c) {
-    return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl;
-}
-
-static Counts& counts() {
-    static Counts counts{};
-    return counts;
-}
-} // namespace robin_hood
-#else
-#    define ROBIN_HOOD_COUNT(x)
-#endif
-
-// all non-argument macros should use this facility. See
-// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/
-#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x()
-
-// mark unused members with this macro
-#define ROBIN_HOOD_UNUSED(identifier)
-
-// bitness
-#if SIZE_MAX == UINT32_MAX
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32
-#elif SIZE_MAX == UINT64_MAX
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64
-#else
-#    error Unsupported bitness
-#endif
-
-// endianess
-#ifdef _MSC_VER
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \
-        (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#endif
-
-// inline
-#ifdef _MSC_VER
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline)
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline))
-#endif
-
-// exceptions
-#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1
-#endif
-
-// count leading/trailing bits
-#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
-#    ifdef _MSC_VER
-#        if ROBIN_HOOD(BITNESS) == 32
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
-#        else
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
-#        endif
-#        include <intrin.h>
-#        pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
-#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x)                                       \
-            [](size_t mask) noexcept -> int {                                             \
-                unsigned long index;                                                      \
-                return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
-                                                                : ROBIN_HOOD(BITNESS);    \
-            }(x)
-#    else
-#        if ROBIN_HOOD(BITNESS) == 32
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
-#        else
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
-#        endif
-#        define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
-#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
-#    endif
-#endif
-
-// fallthrough
-#ifndef __has_cpp_attribute // For backwards compatibility
-#    define __has_cpp_attribute(x) 0
-#endif
-#if __has_cpp_attribute(clang::fallthrough)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]]
-#elif __has_cpp_attribute(gnu::fallthrough)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]]
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH()
-#endif
-
-// likely/unlikely
-#ifdef _MSC_VER
-#    define ROBIN_HOOD_LIKELY(condition) condition
-#    define ROBIN_HOOD_UNLIKELY(condition) condition
-#else
-#    define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1)
-#    define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0)
-#endif
-
-// detect if native wchar_t type is availiable in MSVC
-#ifdef _MSC_VER
-#    ifdef _NATIVE_WCHAR_T_DEFINED
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
-#    else
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0
-#    endif
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
-#endif
-
-// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr
-#ifdef _MSC_VER
-#    if _MSC_VER <= 1900
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1
-#    else
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
-#    endif
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
-#endif
-
-// workaround missing "is_trivially_copyable" in g++ < 5.0
-// See https://stackoverflow.com/a/31798726/48181
-#if defined(__GNUC__) && __GNUC__ < 5
-#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
-#else
-#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
-#endif
-
-// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L
-
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]]
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD()
-#endif
-
-namespace robin_hood {
-
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
-#    define ROBIN_HOOD_STD std
-#else
-
-// c++11 compatibility layer
-namespace ROBIN_HOOD_STD {
-template <class T>
-struct alignment_of
-    : std::integral_constant<std::size_t, alignof(typename std::remove_all_extents<T>::type)> {};
-
-template <class T, T... Ints>
-class integer_sequence {
-public:
-    using value_type = T;
-    static_assert(std::is_integral<value_type>::value, "not integral type");
-    static constexpr std::size_t size() noexcept {
-        return sizeof...(Ints);
-    }
-};
-template <std::size_t... Inds>
-using index_sequence = integer_sequence<std::size_t, Inds...>;
-
-namespace detail_ {
-template <class T, T Begin, T End, bool>
-struct IntSeqImpl {
-    using TValue = T;
-    static_assert(std::is_integral<TValue>::value, "not integral type");
-    static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)");
-
-    template <class, class>
-    struct IntSeqCombiner;
-
-    template <TValue... Inds0, TValue... Inds1>
-    struct IntSeqCombiner<integer_sequence<TValue, Inds0...>, integer_sequence<TValue, Inds1...>> {
-        using TResult = integer_sequence<TValue, Inds0..., Inds1...>;
-    };
-
-    using TResult =
-        typename IntSeqCombiner<typename IntSeqImpl<TValue, Begin, Begin + (End - Begin) / 2,
-                                                    (End - Begin) / 2 == 1>::TResult,
-                                typename IntSeqImpl<TValue, Begin + (End - Begin) / 2, End,
-                                                    (End - Begin + 1) / 2 == 1>::TResult>::TResult;
-};
-
-template <class T, T Begin>
-struct IntSeqImpl<T, Begin, Begin, false> {
-    using TValue = T;
-    static_assert(std::is_integral<TValue>::value, "not integral type");
-    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
-    using TResult = integer_sequence<TValue>;
-};
-
-template <class T, T Begin, T End>
-struct IntSeqImpl<T, Begin, End, true> {
-    using TValue = T;
-    static_assert(std::is_integral<TValue>::value, "not integral type");
-    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
-    using TResult = integer_sequence<TValue, Begin>;
-};
-} // namespace detail_
-
-template <class T, T N>
-using make_integer_sequence = typename detail_::IntSeqImpl<T, 0, N, (N - 0) == 1>::TResult;
-
-template <std::size_t N>
-using make_index_sequence = make_integer_sequence<std::size_t, N>;
-
-template <class... T>
-using index_sequence_for = make_index_sequence<sizeof...(T)>;
-
-} // namespace ROBIN_HOOD_STD
-
-#endif
-
-namespace detail {
-
-// make sure we static_cast to the correct type for hash_int
-#if ROBIN_HOOD(BITNESS) == 64
-using SizeT = uint64_t;
-#else
-using SizeT = uint32_t;
-#endif
-
-template <typename T>
-T rotr(T x, unsigned k) {
-    return (x >> k) | (x << (8U * sizeof(T) - k));
-}
-
-// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to
-// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with
-// care!
-template <typename T>
-inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept {
-    return reinterpret_cast<T>(ptr);
-}
-
-template <typename T>
-inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
-    return reinterpret_cast<T>(ptr);
-}
-
-// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
-// inlinings more difficult. Throws are also generally the slow path.
-template <typename E, typename... Args>
-[[noreturn]] ROBIN_HOOD(NOINLINE)
-#if ROBIN_HOOD(HAS_EXCEPTIONS)
-    void doThrow(Args&&... args) {
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
-    throw E(std::forward<Args>(args)...);
-}
-#else
-    void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
-    abort();
-}
-#endif
-
-template <typename E, typename T, typename... Args>
-T* assertNotNull(T* t, Args&&... args) {
-    if (ROBIN_HOOD_UNLIKELY(nullptr == t)) {
-        doThrow<E>(std::forward<Args>(args)...);
-    }
-    return t;
-}
-
-template <typename T>
-inline T unaligned_load(void const* ptr) noexcept {
-    // using memcpy so we don't get into unaligned load problems.
-    // compiler should optimize this very well anyways.
-    T t;
-    std::memcpy(&t, ptr, sizeof(T));
-    return t;
-}
-
-// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor,
-// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a
-// pointer.
-template <typename T, size_t MinNumAllocs = 4, size_t MaxNumAllocs = 256>
-class BulkPoolAllocator {
-public:
-    BulkPoolAllocator() noexcept = default;
-
-    // does not copy anything, just creates a new allocator.
-    BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept
-        : mHead(nullptr)
-        , mListForFree(nullptr) {}
-
-    BulkPoolAllocator(BulkPoolAllocator&& o) noexcept
-        : mHead(o.mHead)
-        , mListForFree(o.mListForFree) {
-        o.mListForFree = nullptr;
-        o.mHead = nullptr;
-    }
-
-    BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept {
-        reset();
-        mHead = o.mHead;
-        mListForFree = o.mListForFree;
-        o.mListForFree = nullptr;
-        o.mHead = nullptr;
-        return *this;
-    }
-
-    BulkPoolAllocator&
-    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
-    operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept {
-        // does not do anything
-        return *this;
-    }
-
-    ~BulkPoolAllocator() noexcept {
-        reset();
-    }
-
-    // Deallocates all allocated memory.
-    void reset() noexcept {
-        while (mListForFree) {
-            T* tmp = *mListForFree;
-            ROBIN_HOOD_LOG("std::free")
-            std::free(mListForFree);
-            mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
-        }
-        mHead = nullptr;
-    }
-
-    // allocates, but does NOT initialize. Use in-place new constructor, e.g.
-    //   T* obj = pool.allocate();
-    //   ::new (static_cast<void*>(obj)) T();
-    T* allocate() {
-        T* tmp = mHead;
-        if (!tmp) {
-            tmp = performAllocation();
-        }
-
-        mHead = *reinterpret_cast_no_cast_align_warning<T**>(tmp);
-        return tmp;
-    }
-
-    // does not actually deallocate but puts it in store.
-    // make sure you have already called the destructor! e.g. with
-    //  obj->~T();
-    //  pool.deallocate(obj);
-    void deallocate(T* obj) noexcept {
-        *reinterpret_cast_no_cast_align_warning<T**>(obj) = mHead;
-        mHead = obj;
-    }
-
-    // Adds an already allocated block of memory to the allocator. This allocator is from now on
-    // responsible for freeing the data (with free()). If the provided data is not large enough to
-    // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor.
-    void addOrFree(void* ptr, const size_t numBytes) noexcept {
-        // calculate number of available elements in ptr
-        if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
-            // not enough data for at least one element. Free and return.
-            ROBIN_HOOD_LOG("std::free")
-            std::free(ptr);
-        } else {
-            ROBIN_HOOD_LOG("add to buffer")
-            add(ptr, numBytes);
-        }
-    }
-
-    void swap(BulkPoolAllocator<T, MinNumAllocs, MaxNumAllocs>& other) noexcept {
-        using std::swap;
-        swap(mHead, other.mHead);
-        swap(mListForFree, other.mListForFree);
-    }
-
-private:
-    // iterates the list of allocated memory to calculate how many to alloc next.
-    // Recalculating this each time saves us a size_t member.
-    // This ignores the fact that memory blocks might have been added manually with addOrFree. In
-    // practice, this should not matter much.
-    ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept {
-        auto tmp = mListForFree;
-        size_t numAllocs = MinNumAllocs;
-
-        while (numAllocs * 2 <= MaxNumAllocs && tmp) {
-            auto x = reinterpret_cast<T***>(tmp);
-            tmp = *x;
-            numAllocs *= 2;
-        }
-
-        return numAllocs;
-    }
-
-    // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree().
-    void add(void* ptr, const size_t numBytes) noexcept {
-        const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE;
-
-        auto data = reinterpret_cast<T**>(ptr);
-
-        // link free list
-        auto x = reinterpret_cast<T***>(data);
-        *x = mListForFree;
-        mListForFree = data;
-
-        // create linked list for newly allocated data
-        auto* const headT =
-            reinterpret_cast_no_cast_align_warning<T*>(reinterpret_cast<char*>(ptr) + ALIGNMENT);
-
-        auto* const head = reinterpret_cast<char*>(headT);
-
-        // Visual Studio compiler automatically unrolls this loop, which is pretty cool
-        for (size_t i = 0; i < numElements; ++i) {
-            *reinterpret_cast_no_cast_align_warning<char**>(head + i * ALIGNED_SIZE) =
-                head + (i + 1) * ALIGNED_SIZE;
-        }
-
-        // last one points to 0
-        *reinterpret_cast_no_cast_align_warning<T**>(head + (numElements - 1) * ALIGNED_SIZE) =
-            mHead;
-        mHead = headT;
-    }
-
-    // Called when no memory is available (mHead == 0).
-    // Don't inline this slow path.
-    ROBIN_HOOD(NOINLINE) T* performAllocation() {
-        size_t const numElementsToAlloc = calcNumElementsToAlloc();
-
-        // alloc new memory: [prev |T, T, ... T]
-        size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
-        ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
-                                      << " * " << numElementsToAlloc)
-        add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
-        return mHead;
-    }
-
-    // enforce byte alignment of the T's
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
-    static constexpr size_t ALIGNMENT =
-        (std::max)(std::alignment_of<T>::value, std::alignment_of<T*>::value);
-#else
-    static const size_t ALIGNMENT =
-        (ROBIN_HOOD_STD::alignment_of<T>::value > ROBIN_HOOD_STD::alignment_of<T*>::value)
-            ? ROBIN_HOOD_STD::alignment_of<T>::value
-            : +ROBIN_HOOD_STD::alignment_of<T*>::value; // the + is for walkarround
-#endif
-
-    static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT;
-
-    static_assert(MinNumAllocs >= 1, "MinNumAllocs");
-    static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs");
-    static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE");
-    static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod");
-    static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT");
-
-    T* mHead{nullptr};
-    T** mListForFree{nullptr};
-};
-
-template <typename T, size_t MinSize, size_t MaxSize, bool IsFlat>
-struct NodeAllocator;
-
-// dummy allocator that does nothing
-template <typename T, size_t MinSize, size_t MaxSize>
-struct NodeAllocator<T, MinSize, MaxSize, true> {
-
-    // we are not using the data, so just free it.
-    void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
-        ROBIN_HOOD_LOG("std::free")
-        std::free(ptr);
-    }
-};
-
-template <typename T, size_t MinSize, size_t MaxSize>
-struct NodeAllocator<T, MinSize, MaxSize, false> : public BulkPoolAllocator<T, MinSize, MaxSize> {};
-
-// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making
-// my own here.
-namespace swappable {
-#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17)
-using std::swap;
-template <typename T>
-struct nothrow {
-    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
-};
-#else
-template <typename T>
-struct nothrow {
-    static const bool value = std::is_nothrow_swappable<T>::value;
-};
-#endif
-} // namespace swappable
-
-} // namespace detail
-
-struct is_transparent_tag {};
-
-// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable,
-// which means it would  not be allowed to be used in std::memcpy. This struct is copyable, which is
-// also tested.
-template <typename T1, typename T2>
-struct pair {
-    using first_type = T1;
-    using second_type = T2;
-
-    template <typename U1 = T1, typename U2 = T2,
-              typename = typename std::enable_if<std::is_default_constructible<U1>::value &&
-                                                 std::is_default_constructible<U2>::value>::type>
-    constexpr pair() noexcept(noexcept(U1()) && noexcept(U2()))
-        : first()
-        , second() {}
-
-    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
-    explicit constexpr pair(std::pair<T1, T2> const& o) noexcept(
-        noexcept(T1(std::declval<T1 const&>())) && noexcept(T2(std::declval<T2 const&>())))
-        : first(o.first)
-        , second(o.second) {}
-
-    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
-    explicit constexpr pair(std::pair<T1, T2>&& o) noexcept(noexcept(
-        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
-        : first(std::move(o.first))
-        , second(std::move(o.second)) {}
-
-    constexpr pair(T1&& a, T2&& b) noexcept(noexcept(
-        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
-        : first(std::move(a))
-        , second(std::move(b)) {}
-
-    template <typename U1, typename U2>
-    constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward<U1>(
-        std::declval<U1&&>()))) && noexcept(T2(std::forward<U2>(std::declval<U2&&>()))))
-        : first(std::forward<U1>(a))
-        , second(std::forward<U2>(b)) {}
-
-    template <typename... U1, typename... U2>
-    // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members"
-    // if this constructor is constexpr
-#if !ROBIN_HOOD(BROKEN_CONSTEXPR)
-    constexpr
-#endif
-        pair(std::piecewise_construct_t /*unused*/, std::tuple<U1...> a,
-             std::tuple<U2...>
-                 b) noexcept(noexcept(pair(std::declval<std::tuple<U1...>&>(),
-                                           std::declval<std::tuple<U2...>&>(),
-                                           ROBIN_HOOD_STD::index_sequence_for<U1...>(),
-                                           ROBIN_HOOD_STD::index_sequence_for<U2...>())))
-        : pair(a, b, ROBIN_HOOD_STD::index_sequence_for<U1...>(),
-               ROBIN_HOOD_STD::index_sequence_for<U2...>()) {
-    }
-
-    // constructor called from the std::piecewise_construct_t ctor
-    template <typename... U1, size_t... I1, typename... U2, size_t... I2>
-    pair(std::tuple<U1...>& a, std::tuple<U2...>& b, ROBIN_HOOD_STD::index_sequence<I1...> /*unused*/, ROBIN_HOOD_STD::index_sequence<I2...> /*unused*/) noexcept(
-        noexcept(T1(std::forward<U1>(std::get<I1>(
-            std::declval<std::tuple<
-                U1...>&>()))...)) && noexcept(T2(std::
-                                                     forward<U2>(std::get<I2>(
-                                                         std::declval<std::tuple<U2...>&>()))...)))
-        : first(std::forward<U1>(std::get<I1>(a))...)
-        , second(std::forward<U2>(std::get<I2>(b))...) {
-        // make visual studio compiler happy about warning about unused a & b.
-        // Visual studio's pair implementation disables warning 4100.
-        (void)a;
-        (void)b;
-    }
-
-    void swap(pair<T1, T2>& o) noexcept((detail::swappable::nothrow<T1>::value) &&
-                                        (detail::swappable::nothrow<T2>::value)) {
-        using std::swap;
-        swap(first, o.first);
-        swap(second, o.second);
-    }
-
-    T1 first;  // NOLINT(misc-non-private-member-variables-in-classes)
-    T2 second; // NOLINT(misc-non-private-member-variables-in-classes)
-};
-
-template <typename A, typename B>
-inline void swap(pair<A, B>& a, pair<A, B>& b) noexcept(
-    noexcept(std::declval<pair<A, B>&>().swap(std::declval<pair<A, B>&>()))) {
-    a.swap(b);
-}
-
-template <typename A, typename B>
-inline constexpr bool operator==(pair<A, B> const& x, pair<A, B> const& y) {
-    return (x.first == y.first) && (x.second == y.second);
-}
-template <typename A, typename B>
-inline constexpr bool operator!=(pair<A, B> const& x, pair<A, B> const& y) {
-    return !(x == y);
-}
-template <typename A, typename B>
-inline constexpr bool operator<(pair<A, B> const& x, pair<A, B> const& y) noexcept(noexcept(
-    std::declval<A const&>() < std::declval<A const&>()) && noexcept(std::declval<B const&>() <
-                                                                     std::declval<B const&>())) {
-    return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
-}
-template <typename A, typename B>
-inline constexpr bool operator>(pair<A, B> const& x, pair<A, B> const& y) {
-    return y < x;
-}
-template <typename A, typename B>
-inline constexpr bool operator<=(pair<A, B> const& x, pair<A, B> const& y) {
-    return !(x > y);
-}
-template <typename A, typename B>
-inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
-    return !(x < y);
-}
-
-inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
-    static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
-    static constexpr uint64_t seed = UINT64_C(0xe17a1465);
-    static constexpr unsigned int r = 47;
-
-    auto const* const data64 = static_cast<uint64_t const*>(ptr);
-    uint64_t h = seed ^ (len * m);
-
-    size_t const n_blocks = len / 8;
-    for (size_t i = 0; i < n_blocks; ++i) {
-        auto k = detail::unaligned_load<uint64_t>(data64 + i);
-
-        k *= m;
-        k ^= k >> r;
-        k *= m;
-
-        h ^= k;
-        h *= m;
-    }
-
-    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
-    switch (len & 7U) {
-    case 7:
-        h ^= static_cast<uint64_t>(data8[6]) << 48U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 6:
-        h ^= static_cast<uint64_t>(data8[5]) << 40U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 5:
-        h ^= static_cast<uint64_t>(data8[4]) << 32U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 4:
-        h ^= static_cast<uint64_t>(data8[3]) << 24U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 3:
-        h ^= static_cast<uint64_t>(data8[2]) << 16U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 2:
-        h ^= static_cast<uint64_t>(data8[1]) << 8U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 1:
-        h ^= static_cast<uint64_t>(data8[0]);
-        h *= m;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    default:
-        break;
-    }
-
-    h ^= h >> r;
-
-    // not doing the final step here, because this will be done by keyToIdx anyways
-    // h *= m;
-    // h ^= h >> r;
-    return static_cast<size_t>(h);
-}
-
-inline size_t hash_int(uint64_t x) noexcept {
-    // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested,
-    // and doesn't need any special 128bit operations.
-    x ^= x >> 33U;
-    x *= UINT64_C(0xff51afd7ed558ccd);
-    x ^= x >> 33U;
-
-    // not doing the final step here, because this will be done by keyToIdx anyways
-    // x *= UINT64_C(0xc4ceb9fe1a85ec53);
-    // x ^= x >> 33U;
-    return static_cast<size_t>(x);
-}
-
-// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
-template <typename T, typename Enable = void>
-struct hash : public std::hash<T> {
-    size_t operator()(T const& obj) const
-        noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
-        // call base hash
-        auto result = std::hash<T>::operator()(obj);
-        // return mixed of that, to be save against identity has
-        return hash_int(static_cast<detail::SizeT>(result));
-    }
-};
-
-template <typename CharT>
-struct hash<std::basic_string<CharT>> {
-    size_t operator()(std::basic_string<CharT> const& str) const noexcept {
-        return hash_bytes(str.data(), sizeof(CharT) * str.size());
-    }
-};
-
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
-template <typename CharT>
-struct hash<std::basic_string_view<CharT>> {
-    size_t operator()(std::basic_string_view<CharT> const& sv) const noexcept {
-        return hash_bytes(sv.data(), sizeof(CharT) * sv.size());
-    }
-};
-#endif
-
-template <class T>
-struct hash<T*> {
-    size_t operator()(T* ptr) const noexcept {
-        return hash_int(reinterpret_cast<detail::SizeT>(ptr));
-    }
-};
-
-template <class T>
-struct hash<std::unique_ptr<T>> {
-    size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
-        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
-    }
-};
-
-template <class T>
-struct hash<std::shared_ptr<T>> {
-    size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
-        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
-    }
-};
-
-template <typename Enum>
-struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
-    size_t operator()(Enum e) const noexcept {
-        using Underlying = typename std::underlying_type<Enum>::type;
-        return hash<Underlying>{}(static_cast<Underlying>(e));
-    }
-};
-
-#define ROBIN_HOOD_HASH_INT(T)                           \
-    template <>                                          \
-    struct hash<T> {                                     \
-        size_t operator()(T const& obj) const noexcept { \
-            return hash_int(static_cast<uint64_t>(obj)); \
-        }                                                \
-    }
-
-#if defined(__GNUC__) && !defined(__clang__)
-#    pragma GCC diagnostic push
-#    pragma GCC diagnostic ignored "-Wuseless-cast"
-#endif
-// see https://en.cppreference.com/w/cpp/utility/hash
-ROBIN_HOOD_HASH_INT(bool);
-ROBIN_HOOD_HASH_INT(char);
-ROBIN_HOOD_HASH_INT(signed char);
-ROBIN_HOOD_HASH_INT(unsigned char);
-ROBIN_HOOD_HASH_INT(char16_t);
-ROBIN_HOOD_HASH_INT(char32_t);
-#if ROBIN_HOOD(HAS_NATIVE_WCHART)
-ROBIN_HOOD_HASH_INT(wchar_t);
-#endif
-ROBIN_HOOD_HASH_INT(short);
-ROBIN_HOOD_HASH_INT(unsigned short);
-ROBIN_HOOD_HASH_INT(int);
-ROBIN_HOOD_HASH_INT(unsigned int);
-ROBIN_HOOD_HASH_INT(long);
-ROBIN_HOOD_HASH_INT(long long);
-ROBIN_HOOD_HASH_INT(unsigned long);
-ROBIN_HOOD_HASH_INT(unsigned long long);
-#if defined(__GNUC__) && !defined(__clang__)
-#    pragma GCC diagnostic pop
-#endif
-namespace detail {
-
-template <typename T>
-struct void_type {
-    using type = void;
-};
-
-template <typename T, typename = void>
-struct has_is_transparent : public std::false_type {};
-
-template <typename T>
-struct has_is_transparent<T, typename void_type<typename T::is_transparent>::type>
-    : public std::true_type {};
-
-// using wrapper classes for hash and key_equal prevents the diamond problem when the same type
-// is used. see https://stackoverflow.com/a/28771920/48181
-template <typename T>
-struct WrapHash : public T {
-    WrapHash() = default;
-    explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
-        : T(o) {}
-};
-
-template <typename T>
-struct WrapKeyEqual : public T {
-    WrapKeyEqual() = default;
-    explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
-        : T(o) {}
-};
-
-// A highly optimized hashmap implementation, using the Robin Hood algorithm.
-//
-// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but
-// be about 2x faster in most cases and require much less allocations.
-//
-// This implementation uses the following memory layout:
-//
-// [Node, Node, ... Node | info, info, ... infoSentinel ]
-//
-// * Node: either a DataNode that directly has the std::pair<key, val> as member,
-//   or a DataNode with a pointer to std::pair<key,val>. Which DataNode representation to use
-//   depends on how fast the swap() operation is. Heuristically, this is automatically choosen
-//   based on sizeof(). there are always 2^n Nodes.
-//
-// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes.
-//   Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the
-//   corresponding node contains data. Set to 2 means the corresponding Node is filled, but it
-//   actually belongs to the previous position and was pushed out because that place is already
-//   taken.
-//
-// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the
-//   need for a idx variable.
-//
-// According to STL, order of templates has effect on throughput. That's why I've moved the
-// boolean to the front.
-// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/
-template <bool IsFlat, size_t MaxLoadFactor100, typename Key, typename T, typename Hash,
-          typename KeyEqual>
-class Table
-    : public WrapHash<Hash>,
-      public WrapKeyEqual<KeyEqual>,
-      detail::NodeAllocator<
-          typename std::conditional<
-              std::is_void<T>::value, Key,
-              robin_hood::pair<typename std::conditional<IsFlat, Key, Key const>::type, T>>::type,
-          4, 16384, IsFlat> {
-public:
-    static constexpr bool is_flat = IsFlat;
-    static constexpr bool is_map = !std::is_void<T>::value;
-    static constexpr bool is_set = !is_map;
-    static constexpr bool is_transparent =
-        has_is_transparent<Hash>::value && has_is_transparent<KeyEqual>::value;
-
-    using key_type = Key;
-    using mapped_type = T;
-    using value_type = typename std::conditional<
-        is_set, Key,
-        robin_hood::pair<typename std::conditional<is_flat, Key, Key const>::type, T>>::type;
-    using size_type = size_t;
-    using hasher = Hash;
-    using key_equal = KeyEqual;
-    using Self = Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
-
-private:
-    static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100,
-                  "MaxLoadFactor100 needs to be >10 && < 100");
-
-    using WHash = WrapHash<Hash>;
-    using WKeyEqual = WrapKeyEqual<KeyEqual>;
-
-    // configuration defaults
-
-    // make sure we have 8 elements, needed to quickly rehash mInfo
-    static constexpr size_t InitialNumElements = sizeof(uint64_t);
-    static constexpr uint32_t InitialInfoNumBits = 5;
-    static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
-    static constexpr size_t InfoMask = InitialInfoInc - 1U;
-    static constexpr uint8_t InitialInfoHashShift = 0;
-    using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
-
-    // type needs to be wider than uint8_t.
-    using InfoType = uint32_t;
-
-    // DataNode ////////////////////////////////////////////////////////
-
-    // Primary template for the data node. We have special implementations for small and big
-    // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these
-    // on the heap so swap merely swaps a pointer.
-    template <typename M, bool>
-    class DataNode {};
-
-    // Small: just allocate on the stack.
-    template <typename M>
-    class DataNode<M, true> final {
-    public:
-        template <typename... Args>
-        explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept(
-            noexcept(value_type(std::forward<Args>(args)...)))
-            : mData(std::forward<Args>(args)...) {}
-
-        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, true>&& n) noexcept(
-            std::is_nothrow_move_constructible<value_type>::value)
-            : mData(std::move(n.mData)) {}
-
-        // doesn't do anything
-        void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {}
-        void destroyDoNotDeallocate() noexcept {}
-
-        value_type const* operator->() const noexcept {
-            return &mData;
-        }
-        value_type* operator->() noexcept {
-            return &mData;
-        }
-
-        const value_type& operator*() const noexcept {
-            return mData;
-        }
-
-        value_type& operator*() noexcept {
-            return mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
-            return mData.first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
-            return mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type const&>::type
-            getFirst() const noexcept {
-            return mData.first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
-            return mData;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
-            return mData.second;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, MT const&>::type getSecond() const noexcept {
-            return mData.second;
-        }
-
-        void swap(DataNode<M, true>& o) noexcept(
-            noexcept(std::declval<value_type>().swap(std::declval<value_type>()))) {
-            mData.swap(o.mData);
-        }
-
-    private:
-        value_type mData;
-    };
-
-    // big object: allocate on heap.
-    template <typename M>
-    class DataNode<M, false> {
-    public:
-        template <typename... Args>
-        explicit DataNode(M& map, Args&&... args)
-            : mData(map.allocate()) {
-            ::new (static_cast<void*>(mData)) value_type(std::forward<Args>(args)...);
-        }
-
-        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, false>&& n) noexcept
-            : mData(std::move(n.mData)) {}
-
-        void destroy(M& map) noexcept {
-            // don't deallocate, just put it into list of datapool.
-            mData->~value_type();
-            map.deallocate(mData);
-        }
-
-        void destroyDoNotDeallocate() noexcept {
-            mData->~value_type();
-        }
-
-        value_type const* operator->() const noexcept {
-            return mData;
-        }
-
-        value_type* operator->() noexcept {
-            return mData;
-        }
-
-        const value_type& operator*() const {
-            return *mData;
-        }
-
-        value_type& operator*() {
-            return *mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
-            return mData->first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
-            return *mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type const&>::type
-            getFirst() const noexcept {
-            return mData->first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
-            return *mData;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
-            return mData->second;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, MT const&>::type getSecond() const noexcept {
-            return mData->second;
-        }
-
-        void swap(DataNode<M, false>& o) noexcept {
-            using std::swap;
-            swap(mData, o.mData);
-        }
-
-    private:
-        value_type* mData;
-    };
-
-    using Node = DataNode<Self, IsFlat>;
-
-    // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required)
-    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept {
-        return n.getFirst();
-    }
-
-    // in case we have void mapped_type, we are not using a pair, thus we just route k through.
-    // No need to disable this because it's just not used if not applicable.
-    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept {
-        return k;
-    }
-
-    // in case we have non-void mapped_type, we have a standard robin_hood::pair
-    template <typename Q = mapped_type>
-    ROBIN_HOOD(NODISCARD)
-    typename std::enable_if<!std::is_void<Q>::value, key_type const&>::type
-        getFirstConst(value_type const& vt) const noexcept {
-        return vt.first;
-    }
-
-    // Cloner //////////////////////////////////////////////////////////
-
-    template <typename M, bool UseMemcpy>
-    struct Cloner;
-
-    // fast path: Just copy data, without allocating anything.
-    template <typename M>
-    struct Cloner<M, true> {
-        void operator()(M const& source, M& target) const {
-            auto const* const src = reinterpret_cast<char const*>(source.mKeyVals);
-            auto* tgt = reinterpret_cast<char*>(target.mKeyVals);
-            auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1);
-            std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt);
-        }
-    };
-
-    template <typename M>
-    struct Cloner<M, false> {
-        void operator()(M const& s, M& t) const {
-            auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1);
-            std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo);
-
-            for (size_t i = 0; i < numElementsWithBuffer; ++i) {
-                if (t.mInfo[i]) {
-                    ::new (static_cast<void*>(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]);
-                }
-            }
-        }
-    };
-
-    // Destroyer ///////////////////////////////////////////////////////
-
-    template <typename M, bool IsFlatAndTrivial>
-    struct Destroyer {};
-
-    template <typename M>
-    struct Destroyer<M, true> {
-        void nodes(M& m) const noexcept {
-            m.mNumElements = 0;
-        }
-
-        void nodesDoNotDeallocate(M& m) const noexcept {
-            m.mNumElements = 0;
-        }
-    };
-
-    template <typename M>
-    struct Destroyer<M, false> {
-        void nodes(M& m) const noexcept {
-            m.mNumElements = 0;
-            // clear also resets mInfo to 0, that's sometimes not necessary.
-            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
-
-            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
-                if (0 != m.mInfo[idx]) {
-                    Node& n = m.mKeyVals[idx];
-                    n.destroy(m);
-                    n.~Node();
-                }
-            }
-        }
-
-        void nodesDoNotDeallocate(M& m) const noexcept {
-            m.mNumElements = 0;
-            // clear also resets mInfo to 0, that's sometimes not necessary.
-            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
-            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
-                if (0 != m.mInfo[idx]) {
-                    Node& n = m.mKeyVals[idx];
-                    n.destroyDoNotDeallocate();
-                    n.~Node();
-                }
-            }
-        }
-    };
-
-    // Iter ////////////////////////////////////////////////////////////
-
-    struct fast_forward_tag {};
-
-    // generic iterator for both const_iterator and iterator.
-    template <bool IsConst>
-    // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions)
-    class Iter {
-    private:
-        using NodePtr = typename std::conditional<IsConst, Node const*, Node*>::type;
-
-    public:
-        using difference_type = std::ptrdiff_t;
-        using value_type = typename Self::value_type;
-        using reference = typename std::conditional<IsConst, value_type const&, value_type&>::type;
-        using pointer = typename std::conditional<IsConst, value_type const*, value_type*>::type;
-        using iterator_category = std::forward_iterator_tag;
-
-        // default constructed iterator can be compared to itself, but WON'T return true when
-        // compared to end().
-        Iter() = default;
-
-        // Rule of zero: nothing specified. The conversion constructor is only enabled for
-        // iterator to const_iterator, so it doesn't accidentally work as a copy ctor.
-
-        // Conversion constructor from iterator to const_iterator.
-        template <bool OtherIsConst,
-                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
-        // NOLINTNEXTLINE(hicpp-explicit-conversions)
-        Iter(Iter<OtherIsConst> const& other) noexcept
-            : mKeyVals(other.mKeyVals)
-            , mInfo(other.mInfo) {}
-
-        Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept
-            : mKeyVals(valPtr)
-            , mInfo(infoPtr) {}
-
-        Iter(NodePtr valPtr, uint8_t const* infoPtr,
-             fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept
-            : mKeyVals(valPtr)
-            , mInfo(infoPtr) {
-            fastForward();
-        }
-
-        template <bool OtherIsConst,
-                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
-        Iter& operator=(Iter<OtherIsConst> const& other) noexcept {
-            mKeyVals = other.mKeyVals;
-            mInfo = other.mInfo;
-            return *this;
-        }
-
-        // prefix increment. Undefined behavior if we are at end()!
-        Iter& operator++() noexcept {
-            mInfo++;
-            mKeyVals++;
-            fastForward();
-            return *this;
-        }
-
-        Iter operator++(int) noexcept {
-            Iter tmp = *this;
-            ++(*this);
-            return tmp;
-        }
-
-        reference operator*() const {
-            return **mKeyVals;
-        }
-
-        pointer operator->() const {
-            return &**mKeyVals;
-        }
-
-        template <bool O>
-        bool operator==(Iter<O> const& o) const noexcept {
-            return mKeyVals == o.mKeyVals;
-        }
-
-        template <bool O>
-        bool operator!=(Iter<O> const& o) const noexcept {
-            return mKeyVals != o.mKeyVals;
-        }
-
-    private:
-        // fast forward to the next non-free info byte
-        // I've tried a few variants that don't depend on intrinsics, but unfortunately they are
-        // quite a bit slower than this one. So I've reverted that change again. See map_benchmark.
-        void fastForward() noexcept {
-            size_t n = 0;
-            while (0U == (n = detail::unaligned_load<size_t>(mInfo))) {
-                mInfo += sizeof(size_t);
-                mKeyVals += sizeof(size_t);
-            }
-#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
-            // we know for certain that within the next 8 bytes we'll find a non-zero one.
-            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
-                mInfo += 4;
-                mKeyVals += 4;
-            }
-            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
-                mInfo += 2;
-                mKeyVals += 2;
-            }
-            if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
-                mInfo += 1;
-                mKeyVals += 1;
-            }
-#else
-#    if ROBIN_HOOD(LITTLE_ENDIAN)
-            auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
-#    else
-            auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
-#    endif
-            mInfo += inc;
-            mKeyVals += inc;
-#endif
-        }
-
-        friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
-        NodePtr mKeyVals{nullptr};
-        uint8_t const* mInfo{nullptr};
-    };
-
-    ////////////////////////////////////////////////////////////////////
-
-    // highly performance relevant code.
-    // Lower bits are used for indexing into the array (2^n size)
-    // The upper 1-5 bits need to be a reasonable good hash, to save comparisons.
-    template <typename HashKey>
-    void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const {
-        // In addition to whatever hash is used, add another mul & shift so we get better hashing.
-        // This serves as a bad hash prevention, if the given data is
-        // badly mixed.
-        auto h = static_cast<uint64_t>(WHash::operator()(key));
-
-        h *= mHashMultiplier;
-        h ^= h >> 33U;
-
-        // the lower InitialInfoNumBits are reserved for info.
-        *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
-        *idx = (static_cast<size_t>(h) >> InitialInfoNumBits) & mMask;
-    }
-
-    // forwards the index by one, wrapping around at the end
-    void next(InfoType* info, size_t* idx) const noexcept {
-        *idx = *idx + 1;
-        *info += mInfoInc;
-    }
-
-    void nextWhileLess(InfoType* info, size_t* idx) const noexcept {
-        // unrolling this by hand did not bring any speedups.
-        while (*info < mInfo[*idx]) {
-            next(info, idx);
-        }
-    }
-
-    // Shift everything up by one element. Tries to move stuff around.
-    void
-    shiftUp(size_t startIdx,
-            size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
-        auto idx = startIdx;
-        ::new (static_cast<void*>(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1]));
-        while (--idx != insertion_idx) {
-            mKeyVals[idx] = std::move(mKeyVals[idx - 1]);
-        }
-
-        idx = startIdx;
-        while (idx != insertion_idx) {
-            ROBIN_HOOD_COUNT(shiftUp)
-            mInfo[idx] = static_cast<uint8_t>(mInfo[idx - 1] + mInfoInc);
-            if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) {
-                mMaxNumElementsAllowed = 0;
-            }
-            --idx;
-        }
-    }
-
-    void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
-        // until we find one that is either empty or has zero offset.
-        // TODO(martinus) we don't need to move everything, just the last one for the same
-        // bucket.
-        mKeyVals[idx].destroy(*this);
-
-        // until we find one that is either empty or has zero offset.
-        while (mInfo[idx + 1] >= 2 * mInfoInc) {
-            ROBIN_HOOD_COUNT(shiftDown)
-            mInfo[idx] = static_cast<uint8_t>(mInfo[idx + 1] - mInfoInc);
-            mKeyVals[idx] = std::move(mKeyVals[idx + 1]);
-            ++idx;
-        }
-
-        mInfo[idx] = 0;
-        // don't destroy, we've moved it
-        // mKeyVals[idx].destroy(*this);
-        mKeyVals[idx].~Node();
-    }
-
-    // copy of find(), except that it returns iterator instead of const_iterator.
-    template <typename Other>
-    ROBIN_HOOD(NODISCARD)
-    size_t findIdx(Other const& key) const {
-        size_t idx{};
-        InfoType info{};
-        keyToIdx(key, &idx, &info);
-
-        do {
-            // unrolling this twice gives a bit of a speedup. More unrolling did not help.
-            if (info == mInfo[idx] &&
-                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
-                return idx;
-            }
-            next(&info, &idx);
-            if (info == mInfo[idx] &&
-                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
-                return idx;
-            }
-            next(&info, &idx);
-        } while (info <= mInfo[idx]);
-
-        // nothing found!
-        return mMask == 0 ? 0
-                          : static_cast<size_t>(std::distance(
-                                mKeyVals, reinterpret_cast_no_cast_align_warning<Node*>(mInfo)));
-    }
-
-    void cloneData(const Table& o) {
-        Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
-    }
-
-    // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
-    // @return True on success, false if something went wrong
-    void insert_move(Node&& keyval) {
-        // we don't retry, fail if overflowing
-        // don't need to check max num elements
-        if (0 == mMaxNumElementsAllowed && !try_increase_info()) {
-            throwOverflowError();
-        }
-
-        size_t idx{};
-        InfoType info{};
-        keyToIdx(keyval.getFirst(), &idx, &info);
-
-        // skip forward. Use <= because we are certain that the element is not there.
-        while (info <= mInfo[idx]) {
-            idx = idx + 1;
-            info += mInfoInc;
-        }
-
-        // key not found, so we are now exactly where we want to insert it.
-        auto const insertion_idx = idx;
-        auto const insertion_info = static_cast<uint8_t>(info);
-        if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
-            mMaxNumElementsAllowed = 0;
-        }
-
-        // find an empty spot
-        while (0 != mInfo[idx]) {
-            next(&info, &idx);
-        }
-
-        auto& l = mKeyVals[insertion_idx];
-        if (idx == insertion_idx) {
-            ::new (static_cast<void*>(&l)) Node(std::move(keyval));
-        } else {
-            shiftUp(idx, insertion_idx);
-            l = std::move(keyval);
-        }
-
-        // put at empty spot
-        mInfo[insertion_idx] = insertion_info;
-
-        ++mNumElements;
-    }
-
-public:
-    using iterator = Iter<false>;
-    using const_iterator = Iter<true>;
-
-    Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
-        : WHash()
-        , WKeyEqual() {
-        ROBIN_HOOD_TRACE(this)
-    }
-
-    // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
-    // This tremendously speeds up ctor & dtor of a map that never receives an element. The
-    // penalty is payed at the first insert, and not before. Lookup of this empty map works
-    // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
-    // standard, but we can ignore it.
-    explicit Table(
-        size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
-        const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
-        : WHash(h)
-        , WKeyEqual(equal) {
-        ROBIN_HOOD_TRACE(this)
-    }
-
-    template <typename Iter>
-    Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0,
-          const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{})
-        : WHash(h)
-        , WKeyEqual(equal) {
-        ROBIN_HOOD_TRACE(this)
-        insert(first, last);
-    }
-
-    Table(std::initializer_list<value_type> initlist,
-          size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
-          const KeyEqual& equal = KeyEqual{})
-        : WHash(h)
-        , WKeyEqual(equal) {
-        ROBIN_HOOD_TRACE(this)
-        insert(initlist.begin(), initlist.end());
-    }
-
-    Table(Table&& o) noexcept
-        : WHash(std::move(static_cast<WHash&>(o)))
-        , WKeyEqual(std::move(static_cast<WKeyEqual&>(o)))
-        , DataPool(std::move(static_cast<DataPool&>(o))) {
-        ROBIN_HOOD_TRACE(this)
-        if (o.mMask) {
-            mHashMultiplier = std::move(o.mHashMultiplier);
-            mKeyVals = std::move(o.mKeyVals);
-            mInfo = std::move(o.mInfo);
-            mNumElements = std::move(o.mNumElements);
-            mMask = std::move(o.mMask);
-            mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
-            mInfoInc = std::move(o.mInfoInc);
-            mInfoHashShift = std::move(o.mInfoHashShift);
-            // set other's mask to 0 so its destructor won't do anything
-            o.init();
-        }
-    }
-
-    Table& operator=(Table&& o) noexcept {
-        ROBIN_HOOD_TRACE(this)
-        if (&o != this) {
-            if (o.mMask) {
-                // only move stuff if the other map actually has some data
-                destroy();
-                mHashMultiplier = std::move(o.mHashMultiplier);
-                mKeyVals = std::move(o.mKeyVals);
-                mInfo = std::move(o.mInfo);
-                mNumElements = std::move(o.mNumElements);
-                mMask = std::move(o.mMask);
-                mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
-                mInfoInc = std::move(o.mInfoInc);
-                mInfoHashShift = std::move(o.mInfoHashShift);
-                WHash::operator=(std::move(static_cast<WHash&>(o)));
-                WKeyEqual::operator=(std::move(static_cast<WKeyEqual&>(o)));
-                DataPool::operator=(std::move(static_cast<DataPool&>(o)));
-
-                o.init();
-
-            } else {
-                // nothing in the other map => just clear us.
-                clear();
-            }
-        }
-        return *this;
-    }
-
-    Table(const Table& o)
-        : WHash(static_cast<const WHash&>(o))
-        , WKeyEqual(static_cast<const WKeyEqual&>(o))
-        , DataPool(static_cast<const DataPool&>(o)) {
-        ROBIN_HOOD_TRACE(this)
-        if (!o.empty()) {
-            // not empty: create an exact copy. it is also possible to just iterate through all
-            // elements and insert them, but copying is probably faster.
-
-            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
-            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
-
-            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
-                                          << numElementsWithBuffer << ")")
-            mHashMultiplier = o.mHashMultiplier;
-            mKeyVals = static_cast<Node*>(
-                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
-            // no need for calloc because clonData does memcpy
-            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
-            mNumElements = o.mNumElements;
-            mMask = o.mMask;
-            mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
-            mInfoInc = o.mInfoInc;
-            mInfoHashShift = o.mInfoHashShift;
-            cloneData(o);
-        }
-    }
-
-    // Creates a copy of the given map. Copy constructor of each entry is used.
-    // Not sure why clang-tidy thinks this doesn't handle self assignment, it does
-    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
-    Table& operator=(Table const& o) {
-        ROBIN_HOOD_TRACE(this)
-        if (&o == this) {
-            // prevent assigning of itself
-            return *this;
-        }
-
-        // we keep using the old allocator and not assign the new one, because we want to keep
-        // the memory available. when it is the same size.
-        if (o.empty()) {
-            if (0 == mMask) {
-                // nothing to do, we are empty too
-                return *this;
-            }
-
-            // not empty: destroy what we have there
-            // clear also resets mInfo to 0, that's sometimes not necessary.
-            destroy();
-            init();
-            WHash::operator=(static_cast<const WHash&>(o));
-            WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
-            DataPool::operator=(static_cast<DataPool const&>(o));
-
-            return *this;
-        }
-
-        // clean up old stuff
-        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
-
-        if (mMask != o.mMask) {
-            // no luck: we don't have the same array size allocated, so we need to realloc.
-            if (0 != mMask) {
-                // only deallocate if we actually have data!
-                ROBIN_HOOD_LOG("std::free")
-                std::free(mKeyVals);
-            }
-
-            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
-            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
-            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
-                                          << numElementsWithBuffer << ")")
-            mKeyVals = static_cast<Node*>(
-                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
-
-            // no need for calloc here because cloneData performs a memcpy.
-            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
-            // sentinel is set in cloneData
-        }
-        WHash::operator=(static_cast<const WHash&>(o));
-        WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
-        DataPool::operator=(static_cast<DataPool const&>(o));
-        mHashMultiplier = o.mHashMultiplier;
-        mNumElements = o.mNumElements;
-        mMask = o.mMask;
-        mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
-        mInfoInc = o.mInfoInc;
-        mInfoHashShift = o.mInfoHashShift;
-        cloneData(o);
-
-        return *this;
-    }
-
-    // Swaps everything between the two maps.
-    void swap(Table& o) {
-        ROBIN_HOOD_TRACE(this)
-        using std::swap;
-        swap(o, *this);
-    }
-
-    // Clears all data, without resizing.
-    void clear() {
-        ROBIN_HOOD_TRACE(this)
-        if (empty()) {
-            // don't do anything! also important because we don't want to write to
-            // DummyInfoByte::b, even though we would just write 0 to it.
-            return;
-        }
-
-        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
-
-        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
-        // clear everything, then set the sentinel again
-        uint8_t const z = 0;
-        std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z);
-        mInfo[numElementsWithBuffer] = 1;
-
-        mInfoInc = InitialInfoInc;
-        mInfoHashShift = InitialInfoHashShift;
-    }
-
-    // Destroys the map and all it's contents.
-    ~Table() {
-        ROBIN_HOOD_TRACE(this)
-        destroy();
-    }
-
-    // Checks if both tables contain the same entries. Order is irrelevant.
-    bool operator==(const Table& other) const {
-        ROBIN_HOOD_TRACE(this)
-        if (other.size() != size()) {
-            return false;
-        }
-        for (auto const& otherEntry : other) {
-            if (!has(otherEntry)) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    bool operator!=(const Table& other) const {
-        ROBIN_HOOD_TRACE(this)
-        return !operator==(other);
-    }
-
-    template <typename Q = mapped_type>
-    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](const key_type& key) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
-                Node(*this, std::piecewise_construct, std::forward_as_tuple(key),
-                     std::forward_as_tuple());
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
-                                               std::forward_as_tuple(key), std::forward_as_tuple());
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-        }
-
-        return mKeyVals[idxAndState.first].getSecond();
-    }
-
-    template <typename Q = mapped_type>
-    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](key_type&& key) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
-                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
-                     std::forward_as_tuple());
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] =
-                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
-                     std::forward_as_tuple());
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-        }
-
-        return mKeyVals[idxAndState.first].getSecond();
-    }
-
-    template <typename Iter>
-    void insert(Iter first, Iter last) {
-        for (; first != last; ++first) {
-            // value_type ctor needed because this might be called with std::pair's
-            insert(value_type(*first));
-        }
-    }
-
-    void insert(std::initializer_list<value_type> ilist) {
-        for (auto&& vt : ilist) {
-            insert(std::move(vt));
-        }
-    }
-
-    template <typename... Args>
-    std::pair<iterator, bool> emplace(Args&&... args) {
-        ROBIN_HOOD_TRACE(this)
-        Node n{*this, std::forward<Args>(args)...};
-        auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n));
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            n.destroy(*this);
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(*this, std::move(n));
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = std::move(n);
-            break;
-
-        case InsertionState::overflow_error:
-            n.destroy(*this);
-            throwOverflowError();
-            break;
-        }
-
-        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
-                              InsertionState::key_found != idxAndState.second);
-    }
-
-    template <typename... Args>
-    iterator emplace_hint(const_iterator position, Args&&... args) {
-        (void)position;
-        return emplace(std::forward<Args>(args)...).first;
-    }
-
-    template <typename... Args>
-    std::pair<iterator, bool> try_emplace(const key_type& key, Args&&... args) {
-        return try_emplace_impl(key, std::forward<Args>(args)...);
-    }
-
-    template <typename... Args>
-    std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
-        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
-    }
-
-    template <typename... Args>
-    iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) {
-        (void)hint;
-        return try_emplace_impl(key, std::forward<Args>(args)...).first;
-    }
-
-    template <typename... Args>
-    iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) {
-        (void)hint;
-        return try_emplace_impl(std::move(key), std::forward<Args>(args)...).first;
-    }
-
-    template <typename Mapped>
-    std::pair<iterator, bool> insert_or_assign(const key_type& key, Mapped&& obj) {
-        return insertOrAssignImpl(key, std::forward<Mapped>(obj));
-    }
-
-    template <typename Mapped>
-    std::pair<iterator, bool> insert_or_assign(key_type&& key, Mapped&& obj) {
-        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj));
-    }
-
-    template <typename Mapped>
-    iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) {
-        (void)hint;
-        return insertOrAssignImpl(key, std::forward<Mapped>(obj)).first;
-    }
-
-    template <typename Mapped>
-    iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) {
-        (void)hint;
-        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj)).first;
-    }
-
-    std::pair<iterator, bool> insert(const value_type& keyval) {
-        ROBIN_HOOD_TRACE(this)
-        return emplace(keyval);
-    }
-
-    iterator insert(const_iterator hint, const value_type& keyval) {
-        (void)hint;
-        return emplace(keyval).first;
-    }
-
-    std::pair<iterator, bool> insert(value_type&& keyval) {
-        return emplace(std::move(keyval));
-    }
-
-    iterator insert(const_iterator hint, value_type&& keyval) {
-        (void)hint;
-        return emplace(std::move(keyval)).first;
-    }
-
-    // Returns 1 if key is found, 0 otherwise.
-    size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            return 1;
-        }
-        return 0;
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<Self_::is_transparent, size_t>::type count(const OtherKey& key) const {
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            return 1;
-        }
-        return 0;
-    }
-
-    bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
-        return 1U == count(key);
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<Self_::is_transparent, bool>::type contains(const OtherKey& key) const {
-        return 1U == count(key);
-    }
-
-    // Returns a reference to the value found for key.
-    // Throws std::out_of_range if element cannot be found
-    template <typename Q = mapped_type>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<!std::is_void<Q>::value, Q&>::type at(key_type const& key) {
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            doThrow<std::out_of_range>("key not found");
-        }
-        return kv->getSecond();
-    }
-
-    // Returns a reference to the value found for key.
-    // Throws std::out_of_range if element cannot be found
-    template <typename Q = mapped_type>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<!std::is_void<Q>::value, Q const&>::type at(key_type const& key) const {
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            doThrow<std::out_of_range>("key not found");
-        }
-        return kv->getSecond();
-    }
-
-    const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return const_iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey>
-    const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return const_iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    typename std::enable_if<Self_::is_transparent, // NOLINT(modernize-use-nodiscard)
-                            const_iterator>::type  // NOLINT(modernize-use-nodiscard)
-    find(const OtherKey& key) const {              // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return const_iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    iterator find(const key_type& key) {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey>
-    iterator find(const OtherKey& key, is_transparent_tag /*unused*/) {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    typename std::enable_if<Self_::is_transparent, iterator>::type find(const OtherKey& key) {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    iterator begin() {
-        ROBIN_HOOD_TRACE(this)
-        if (empty()) {
-            return end();
-        }
-        return iterator(mKeyVals, mInfo, fast_forward_tag{});
-    }
-    const_iterator begin() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return cbegin();
-    }
-    const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        if (empty()) {
-            return cend();
-        }
-        return const_iterator(mKeyVals, mInfo, fast_forward_tag{});
-    }
-
-    iterator end() {
-        ROBIN_HOOD_TRACE(this)
-        // no need to supply valid info pointer: end() must not be dereferenced, and only node
-        // pointer is compared.
-        return iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
-    }
-    const_iterator end() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return cend();
-    }
-    const_iterator cend() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return const_iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
-    }
-
-    iterator erase(const_iterator pos) {
-        ROBIN_HOOD_TRACE(this)
-        // its safe to perform const cast here
-        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
-        return erase(iterator{const_cast<Node*>(pos.mKeyVals), const_cast<uint8_t*>(pos.mInfo)});
-    }
-
-    // Erases element at pos, returns iterator to the next element.
-    iterator erase(iterator pos) {
-        ROBIN_HOOD_TRACE(this)
-        // we assume that pos always points to a valid entry, and not end().
-        auto const idx = static_cast<size_t>(pos.mKeyVals - mKeyVals);
-
-        shiftDown(idx);
-        --mNumElements;
-
-        if (*pos.mInfo) {
-            // we've backward shifted, return this again
-            return pos;
-        }
-
-        // no backward shift, return next element
-        return ++pos;
-    }
-
-    size_t erase(const key_type& key) {
-        ROBIN_HOOD_TRACE(this)
-        size_t idx{};
-        InfoType info{};
-        keyToIdx(key, &idx, &info);
-
-        // check while info matches with the source idx
-        do {
-            if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
-                shiftDown(idx);
-                --mNumElements;
-                return 1;
-            }
-            next(&info, &idx);
-        } while (info <= mInfo[idx]);
-
-        // nothing found to delete
-        return 0;
-    }
-
-    // reserves space for the specified number of elements. Makes sure the old data fits.
-    // exactly the same as reserve(c).
-    void rehash(size_t c) {
-        // forces a reserve
-        reserve(c, true);
-    }
-
-    // reserves space for the specified number of elements. Makes sure the old data fits.
-    // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
-    void reserve(size_t c) {
-        // reserve, but don't force rehash
-        reserve(c, false);
-    }
-
-    // If possible reallocates the map to a smaller one. This frees the underlying table.
-    // Does not do anything if load_factor is too large for decreasing the table's size.
-    void compact() {
-        ROBIN_HOOD_TRACE(this)
-        auto newSize = InitialNumElements;
-        while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) {
-            newSize *= 2;
-        }
-        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
-            throwOverflowError();
-        }
-
-        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
-
-        // only actually do anything when the new size is bigger than the old one. This prevents to
-        // continuously allocate for each reserve() call.
-        if (newSize < mMask + 1) {
-            rehashPowerOfTwo(newSize, true);
-        }
-    }
-
-    size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return mNumElements;
-    }
-
-    size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return static_cast<size_type>(-1);
-    }
-
-    ROBIN_HOOD(NODISCARD) bool empty() const noexcept {
-        ROBIN_HOOD_TRACE(this)
-        return 0 == mNumElements;
-    }
-
-    float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return MaxLoadFactor100 / 100.0F;
-    }
-
-    // Average number of elements per bucket. Since we allow only 1 per bucket
-    float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return static_cast<float>(size()) / static_cast<float>(mMask + 1);
-    }
-
-    ROBIN_HOOD(NODISCARD) size_t mask() const noexcept {
-        ROBIN_HOOD_TRACE(this)
-        return mMask;
-    }
-
-    ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
-        if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
-            return maxElements * MaxLoadFactor100 / 100;
-        }
-
-        // we might be a bit inprecise, but since maxElements is quite large that doesn't matter
-        return (maxElements / 100) * MaxLoadFactor100;
-    }
-
-    ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept {
-        // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load
-        // 64bit types.
-        return numElements + sizeof(uint64_t);
-    }
-
-    ROBIN_HOOD(NODISCARD)
-    size_t calcNumElementsWithBuffer(size_t numElements) const noexcept {
-        auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements);
-        return numElements + (std::min)(maxNumElementsAllowed, (static_cast<size_t>(0xFF)));
-    }
-
-    // calculation only allowed for 2^n values
-    ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const {
-#if ROBIN_HOOD(BITNESS) == 64
-        return numElements * sizeof(Node) + calcNumBytesInfo(numElements);
-#else
-        // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows.
-        auto const ne = static_cast<uint64_t>(numElements);
-        auto const s = static_cast<uint64_t>(sizeof(Node));
-        auto const infos = static_cast<uint64_t>(calcNumBytesInfo(numElements));
-
-        auto const total64 = ne * s + infos;
-        auto const total = static_cast<size_t>(total64);
-
-        if (ROBIN_HOOD_UNLIKELY(static_cast<uint64_t>(total) != total64)) {
-            throwOverflowError();
-        }
-        return total;
-#endif
-    }
-
-private:
-    template <typename Q = mapped_type>
-    ROBIN_HOOD(NODISCARD)
-    typename std::enable_if<!std::is_void<Q>::value, bool>::type has(const value_type& e) const {
-        ROBIN_HOOD_TRACE(this)
-        auto it = find(e.first);
-        return it != end() && it->second == e.second;
-    }
-
-    template <typename Q = mapped_type>
-    ROBIN_HOOD(NODISCARD)
-    typename std::enable_if<std::is_void<Q>::value, bool>::type has(const value_type& e) const {
-        ROBIN_HOOD_TRACE(this)
-        return find(e) != end();
-    }
-
-    void reserve(size_t c, bool forceRehash) {
-        ROBIN_HOOD_TRACE(this)
-        auto const minElementsAllowed = (std::max)(c, mNumElements);
-        auto newSize = InitialNumElements;
-        while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
-            newSize *= 2;
-        }
-        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
-            throwOverflowError();
-        }
-
-        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
-
-        // only actually do anything when the new size is bigger than the old one. This prevents to
-        // continuously allocate for each reserve() call.
-        if (forceRehash || newSize > mMask + 1) {
-            rehashPowerOfTwo(newSize, false);
-        }
-    }
-
-    // reserves space for at least the specified number of elements.
-    // only works if numBuckets if power of two
-    // True on success, false otherwise
-    void rehashPowerOfTwo(size_t numBuckets, bool forceFree) {
-        ROBIN_HOOD_TRACE(this)
-
-        Node* const oldKeyVals = mKeyVals;
-        uint8_t const* const oldInfo = mInfo;
-
-        const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
-
-        // resize operation: move stuff
-        initData(numBuckets);
-        if (oldMaxElementsWithBuffer > 1) {
-            for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) {
-                if (oldInfo[i] != 0) {
-                    // might throw an exception, which is really bad since we are in the middle of
-                    // moving stuff.
-                    insert_move(std::move(oldKeyVals[i]));
-                    // destroy the node but DON'T destroy the data.
-                    oldKeyVals[i].~Node();
-                }
-            }
-
-            // this check is not necessary as it's guarded by the previous if, but it helps
-            // silence g++'s overeager "attempt to free a non-heap object 'map'
-            // [-Werror=free-nonheap-object]" warning.
-            if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
-                // don't destroy old data: put it into the pool instead
-                if (forceFree) {
-                    std::free(oldKeyVals);
-                } else {
-                    DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
-                }
-            }
-        }
-    }
-
-    ROBIN_HOOD(NOINLINE) void throwOverflowError() const {
-#if ROBIN_HOOD(HAS_EXCEPTIONS)
-        throw std::overflow_error("robin_hood::map overflow");
-#else
-        abort();
-#endif
-    }
-
-    template <typename OtherKey, typename... Args>
-    std::pair<iterator, bool> try_emplace_impl(OtherKey&& key, Args&&... args) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
-                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
-                std::forward_as_tuple(std::forward<Args>(args)...));
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
-                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
-                                               std::forward_as_tuple(std::forward<Args>(args)...));
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-            break;
-        }
-
-        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
-                              InsertionState::key_found != idxAndState.second);
-    }
-
-    template <typename OtherKey, typename Mapped>
-    std::pair<iterator, bool> insertOrAssignImpl(OtherKey&& key, Mapped&& obj) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            mKeyVals[idxAndState.first].getSecond() = std::forward<Mapped>(obj);
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
-                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
-                std::forward_as_tuple(std::forward<Mapped>(obj)));
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
-                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
-                                               std::forward_as_tuple(std::forward<Mapped>(obj)));
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-            break;
-        }
-
-        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
-                              InsertionState::key_found != idxAndState.second);
-    }
-
-    void initData(size_t max_elements) {
-        mNumElements = 0;
-        mMask = max_elements - 1;
-        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements);
-
-        auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
-
-        // malloc & zero mInfo. Faster than calloc everything.
-        auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
-        ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
-                                      << numElementsWithBuffer << ")")
-        mKeyVals = reinterpret_cast<Node*>(
-            detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
-        mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
-        std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node));
-
-        // set sentinel
-        mInfo[numElementsWithBuffer] = 1;
-
-        mInfoInc = InitialInfoInc;
-        mInfoHashShift = InitialInfoHashShift;
-    }
-
-    enum class InsertionState { overflow_error, key_found, new_node, overwrite_node };
-
-    // Finds key, and if not already present prepares a spot where to pot the key & value.
-    // This potentially shifts nodes out of the way, updates mInfo and number of inserted
-    // elements, so the only operation left to do is create/assign a new node at that spot.
-    template <typename OtherKey>
-    std::pair<size_t, InsertionState> insertKeyPrepareEmptySpot(OtherKey&& key) {
-        for (int i = 0; i < 256; ++i) {
-            size_t idx{};
-            InfoType info{};
-            keyToIdx(key, &idx, &info);
-            nextWhileLess(&info, &idx);
-
-            // while we potentially have a match
-            while (info == mInfo[idx]) {
-                if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
-                    // key already exists, do NOT insert.
-                    // see http://en.cppreference.com/w/cpp/container/unordered_map/insert
-                    return std::make_pair(idx, InsertionState::key_found);
-                }
-                next(&info, &idx);
-            }
-
-            // unlikely that this evaluates to true
-            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
-                if (!increase_size()) {
-                    return std::make_pair(size_t(0), InsertionState::overflow_error);
-                }
-                continue;
-            }
-
-            // key not found, so we are now exactly where we want to insert it.
-            auto const insertion_idx = idx;
-            auto const insertion_info = info;
-            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
-                mMaxNumElementsAllowed = 0;
-            }
-
-            // find an empty spot
-            while (0 != mInfo[idx]) {
-                next(&info, &idx);
-            }
-
-            if (idx != insertion_idx) {
-                shiftUp(idx, insertion_idx);
-            }
-            // put at empty spot
-            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
-            ++mNumElements;
-            return std::make_pair(insertion_idx, idx == insertion_idx
-                                                     ? InsertionState::new_node
-                                                     : InsertionState::overwrite_node);
-        }
-
-        // enough attempts failed, so finally give up.
-        return std::make_pair(size_t(0), InsertionState::overflow_error);
-    }
-
-    bool try_increase_info() {
-        ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements
-                                   << ", maxNumElementsAllowed="
-                                   << calcMaxNumElementsAllowed(mMask + 1))
-        if (mInfoInc <= 2) {
-            // need to be > 2 so that shift works (otherwise undefined behavior!)
-            return false;
-        }
-        // we got space left, try to make info smaller
-        mInfoInc = static_cast<uint8_t>(mInfoInc >> 1U);
-
-        // remove one bit of the hash, leaving more space for the distance info.
-        // This is extremely fast because we can operate on 8 bytes at once.
-        ++mInfoHashShift;
-        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
-
-        for (size_t i = 0; i < numElementsWithBuffer; i += 8) {
-            auto val = unaligned_load<uint64_t>(mInfo + i);
-            val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f);
-            std::memcpy(mInfo + i, &val, sizeof(val));
-        }
-        // update sentinel, which might have been cleared out!
-        mInfo[numElementsWithBuffer] = 1;
-
-        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
-        return true;
-    }
-
-    // True if resize was possible, false otherwise
-    bool increase_size() {
-        // nothing allocated yet? just allocate InitialNumElements
-        if (0 == mMask) {
-            initData(InitialNumElements);
-            return true;
-        }
-
-        auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
-        if (mNumElements < maxNumElementsAllowed && try_increase_info()) {
-            return true;
-        }
-
-        ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed="
-                                       << maxNumElementsAllowed << ", load="
-                                       << (static_cast<double>(mNumElements) * 100.0 /
-                                           (static_cast<double>(mMask) + 1)))
-
-        if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) {
-            // we have to resize, even though there would still be plenty of space left!
-            // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case
-            // we have to rehash a few times
-            nextHashMultiplier();
-            rehashPowerOfTwo(mMask + 1, true);
-        } else {
-            // we've reached the capacity of the map, so the hash seems to work nice. Keep using it.
-            rehashPowerOfTwo((mMask + 1) * 2, false);
-        }
-        return true;
-    }
-
-    void nextHashMultiplier() {
-        // adding an *even* number, so that the multiplier will always stay odd. This is necessary
-        // so that the hash stays a mixing function (and thus doesn't have any information loss).
-        mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54);
-    }
-
-    void destroy() {
-        if (0 == mMask) {
-            // don't deallocate!
-            return;
-        }
-
-        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}
-            .nodesDoNotDeallocate(*this);
-
-        // This protection against not deleting mMask shouldn't be needed as it's sufficiently
-        // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
-        // reports a compile error: attempt to free a non-heap object 'fm'
-        // [-Werror=free-nonheap-object]
-        if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
-            ROBIN_HOOD_LOG("std::free")
-            std::free(mKeyVals);
-        }
-    }
-
-    void init() noexcept {
-        mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask);
-        mInfo = reinterpret_cast<uint8_t*>(&mMask);
-        mNumElements = 0;
-        mMask = 0;
-        mMaxNumElementsAllowed = 0;
-        mInfoInc = InitialInfoInc;
-        mInfoHashShift = InitialInfoHashShift;
-    }
-
-    // members are sorted so no padding occurs
-    uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53);                // 8 byte  8
-    Node* mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask); // 8 byte 16
-    uint8_t* mInfo = reinterpret_cast<uint8_t*>(&mMask);                    // 8 byte 24
-    size_t mNumElements = 0;                                                // 8 byte 32
-    size_t mMask = 0;                                                       // 8 byte 40
-    size_t mMaxNumElementsAllowed = 0;                                      // 8 byte 48
-    InfoType mInfoInc = InitialInfoInc;                                     // 4 byte 52
-    InfoType mInfoHashShift = InitialInfoHashShift;                         // 4 byte 56
-                                                    // 16 byte 56 if NodeAllocator
-};
-
-} // namespace detail
-
-// map
-
-template <typename Key, typename T, typename Hash = hash<Key>,
-          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
-using unordered_flat_map = detail::Table<true, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
-
-template <typename Key, typename T, typename Hash = hash<Key>,
-          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
-using unordered_node_map = detail::Table<false, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
-
-template <typename Key, typename T, typename Hash = hash<Key>,
-          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
-using unordered_map =
-    detail::Table<sizeof(robin_hood::pair<Key, T>) <= sizeof(size_t) * 6 &&
-                      std::is_nothrow_move_constructible<robin_hood::pair<Key, T>>::value &&
-                      std::is_nothrow_move_assignable<robin_hood::pair<Key, T>>::value,
-                  MaxLoadFactor100, Key, T, Hash, KeyEqual>;
-
-// set
-
-template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          size_t MaxLoadFactor100 = 80>
-using unordered_flat_set = detail::Table<true, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
-
-template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          size_t MaxLoadFactor100 = 80>
-using unordered_node_set = detail::Table<false, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
-
-template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          size_t MaxLoadFactor100 = 80>
-using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
-                                        std::is_nothrow_move_constructible<Key>::value &&
-                                        std::is_nothrow_move_assignable<Key>::value,
-                                    MaxLoadFactor100, Key, void, Hash, KeyEqual>;
-
-} // namespace robin_hood
-
-#endif

From 7d4a558f8b08275950d37e60867bbe61676d9bf2 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 28 Feb 2023 22:17:11 -0800
Subject: [PATCH 21/49] Revert "undo bitmap and unordered map"

This reverts commit e3b6ff4ea2a9f4e69ed4b446e98898ffb4ffe656.
---
 src/Common.cpp         |   84 +-
 src/Common.hpp         |    7 +-
 src/bustools_count.cpp |    6 +-
 src/robin_hood.h       | 2544 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 2586 insertions(+), 55 deletions(-)
 create mode 100644 src/robin_hood.h

diff --git a/src/Common.cpp b/src/Common.cpp
index b4770bd..9a95e54 100644
--- a/src/Common.cpp
+++ b/src/Common.cpp
@@ -73,7 +73,7 @@ std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &
   return std::move(u);
 }
 
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
   if (ecs.empty()) {
     return -1;
   }
@@ -85,59 +85,36 @@ int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u,
   if (ecs.size() == 1) {
     return ecs[0]; // no work
   }
-  
-  u.resize(0);
-  auto &v = ecmap[ecs[0]]; // copy
-  for (size_t i = 0; i< v.size(); i++) {
-    u.push_back(v[i]);
-  }
 
+  uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[0]][0])));
+  u = Roaring(ecmap[ecs[0]].size(), data);
+  
   for (size_t i = 1; i < ecs.size(); i++) {
     if (ecs[i] < 0 || ecs[i] >= ecmap.size()) {
       return -1;
     }
-    const auto &v = ecmap[ecs[i]];
-    
-    int j = 0;
-    int k = 0;
-    int l = 0;
-    int n = u.size();
-    int m = v.size();
-    // u and v are sorted, j,k,l = 0
-    while (j < n && l < m) {
-      // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m
-      //            u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted
-      if (u[j] < v[l]) {
-        j++;
-      } else if (u[j] > v[l]) {
-        l++;
-      } else {
-        // match
-        if (k < j) {
-          std::swap(u[k], u[j]);
-        }
-        k++;
-        j++;
-        i++;
-      }
-    }
-    if (k < n) {
-      u.resize(k);
-    }
+    data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[i]][0])));
+    u &= Roaring(ecmap[ecs[i]].size(), data);
   }
 
-  if (u.empty()) {
+  if (u.isEmpty()) {
     return -1;
   }
   auto iit = ecmapinv.find(u);
   if (iit == ecmapinv.end()) { 
     // create new equivalence class
     int32_t ec = ecmap.size();
-    ecmap.push_back(u);
+    uint32_t* u_arr = new uint32_t[u.cardinality()];
+    u.toUint32Array(u_arr);
+    std::vector<int32_t> u_vec;
+    u_vec.reserve(u.cardinality());
+    for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
+    delete[] u_arr;
+    ecmap.push_back(u_vec);
     ecmapinv.insert({u,ec});
     // figure out the gene list
     std::vector<int32_t> v;
-    vt2gene(u, genemap, v);
+    vt2gene(u_vec, genemap, v);
     ec2genes.push_back(std::move(v));
     return ec;
   } else {
@@ -215,7 +192,7 @@ void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const  std::vector<
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty) {
   
   std::vector<std::vector<int32_t>> gu; // per gene transcript results
-  std::vector<int32_t> u; // final list of transcripts
+  Roaring u; // final list of transcripts
   std::vector<int32_t> glist;
 
   int32_t lastg = -2;
@@ -245,11 +222,9 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     // frequent case, single gene replace with union
     for (auto ec : ecs) {
       for (const auto &t : ecmap[ec]) {      
-        u.push_back(t);
+        u.add(t);
       }
     }
-    std::sort(u.begin(), u.end());
-    u.erase(std::unique(u.begin(), u.end()), u.end());
 
     // look up ecs based on u
     int32_t ec = -1;
@@ -260,9 +235,15 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});  
-      ecmap.push_back(u);
+      uint32_t* u_arr = new uint32_t[u.cardinality()];
+      u.toUint32Array(u_arr);
+      std::vector<int32_t> u_vec;
+      u_vec.reserve(u.cardinality());
+      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
+      delete[] u_arr;
+      ecmap.push_back(u_vec);
       std::vector<int32_t> v;
-      vt2gene(u, genemap, v);
+      vt2gene(u_vec, genemap, v);
       ec2genes.push_back(std::move(v));
     }
 
@@ -291,14 +272,13 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
       }
 
       for (auto t : uu) { 
-        u.push_back(t);
+        u.add(t);
       }
     }
 
-    if (u.empty()) {
+    if (u.isEmpty()) {
       return -1;
     }
-    std::sort(u.begin(), u.end());
 
     int32_t ec = -1;
     auto it = ecmapinv.find(u);
@@ -307,9 +287,15 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});
-      ecmap.push_back(u);
+      uint32_t* u_arr = new uint32_t[u.cardinality()];
+      u.toUint32Array(u_arr);
+      std::vector<int32_t> u_vec;
+      u_vec.reserve(u.cardinality());
+      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
+      delete[] u_arr;
+      ecmap.push_back(u_vec);
       std::vector<int32_t> v;
-      vt2gene(u, genemap, v);
+      vt2gene(u_vec, genemap, v);
       ec2genes.push_back(std::move(v));
     }
     return ec;
diff --git a/src/Common.hpp b/src/Common.hpp
index 6a50ebd..4cc8596 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -9,12 +9,13 @@
 #include <string>
 #include <unordered_map>
 #include <sstream>
+#include "robin_hood.h"
 #include "roaring.hh"
 #include "hash.hpp"
 
 #define BUSTOOLS_VERSION "0.42.0"
 
-#define u_map_ std::unordered_map
+#define u_map_ robin_hood::unordered_flat_map
 enum CAPTURE_TYPE : char
 {
   CAPTURE_NONE = 0,
@@ -185,12 +186,12 @@ struct RoaringHasher {
     return r;
   }
 };
-typedef u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> EcMapInv;
+typedef u_map_<Roaring, int32_t, RoaringHasher> EcMapInv;
 
 std::vector<int32_t> intersect(std::vector<int32_t> &u, std::vector<int32_t> &v);
 std::vector<int32_t> union_vectors(const std::vector<std::vector<int32_t>> &v);
 std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &v);
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
 void vt2gene(const std::vector<int32_t> &v, const std::vector<int32_t> &genemap, std::vector<int32_t> &glist);
 void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const std::vector<std::vector<int32_t>> &ec2genes, std::vector<int32_t> &glist);
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index e0d125f..e2a63a7 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -32,7 +32,8 @@ void bustools_count(Bustools_opt &opt) {
   ecmap = std::move(h.ecs);
   ecmapinv.reserve(ecmap.size());
   for (int32_t ec = 0; ec < ecmap.size(); ec++) {
-    ecmapinv.insert({ecmap[ec], ec});
+    uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ec][0])));
+    ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec});
   }
   std::vector<std::vector<int32_t>> ec2genes;        
   create_ec2genes(ecmap, genemap, ec2genes);
@@ -87,8 +88,7 @@ void bustools_count(Bustools_opt &opt) {
   std::vector<int32_t> ecs;
   std::vector<int32_t> glist;
   ecs.reserve(100);
-  std::vector<int32_t> u;
-  u.reserve(100);
+  Roaring u;
   std::vector<int32_t> column_v;
   std::vector<std::pair<int32_t, std::pair<double, COUNT_MTX_TYPE>>> column_vp; // gene, {count, matrix type}
   if (!opt.count_collapse) {
diff --git a/src/robin_hood.h b/src/robin_hood.h
new file mode 100644
index 0000000..0af031f
--- /dev/null
+++ b/src/robin_hood.h
@@ -0,0 +1,2544 @@
+//                 ______  _____                 ______                _________
+//  ______________ ___  /_ ___(_)_______         ___  /_ ______ ______ ______  /
+//  __  ___/_  __ \__  __ \__  / __  __ \        __  __ \_  __ \_  __ \_  __  /
+//  _  /    / /_/ /_  /_/ /_  /  _  / / /        _  / / // /_/ // /_/ // /_/ /
+//  /_/     \____/ /_.___/ /_/   /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/
+//                                      _/_____/
+//
+// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
+// https://github.com/martinus/robin-hood-hashing
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2021 Martin Ankerl <http://martin.ankerl.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef ROBIN_HOOD_H_INCLUDED
+#define ROBIN_HOOD_H_INCLUDED
+
+// see https://semver.org/
+#define ROBIN_HOOD_VERSION_MAJOR 3  // for incompatible API changes
+#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner
+#define ROBIN_HOOD_VERSION_PATCH 5  // for backwards-compatible bug fixes
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory> // only to support hash of smart pointers
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#if __cplusplus >= 201703L
+#    include <string_view>
+#endif
+
+// #define ROBIN_HOOD_LOG_ENABLED
+#ifdef ROBIN_HOOD_LOG_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_LOG(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_LOG(x)
+#endif
+
+// #define ROBIN_HOOD_TRACE_ENABLED
+#ifdef ROBIN_HOOD_TRACE_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_TRACE(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_TRACE(x)
+#endif
+
+// #define ROBIN_HOOD_COUNT_ENABLED
+#ifdef ROBIN_HOOD_COUNT_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_COUNT(x) ++counts().x;
+namespace robin_hood {
+struct Counts {
+    uint64_t shiftUp{};
+    uint64_t shiftDown{};
+};
+inline std::ostream& operator<<(std::ostream& os, Counts const& c) {
+    return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl;
+}
+
+static Counts& counts() {
+    static Counts counts{};
+    return counts;
+}
+} // namespace robin_hood
+#else
+#    define ROBIN_HOOD_COUNT(x)
+#endif
+
+// all non-argument macros should use this facility. See
+// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/
+#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x()
+
+// mark unused members with this macro
+#define ROBIN_HOOD_UNUSED(identifier)
+
+// bitness
+#if SIZE_MAX == UINT32_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32
+#elif SIZE_MAX == UINT64_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64
+#else
+#    error Unsupported bitness
+#endif
+
+// endianess
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \
+        (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#endif
+
+// inline
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline)
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline))
+#endif
+
+// exceptions
+#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1
+#endif
+
+// count leading/trailing bits
+#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+#    ifdef _MSC_VER
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
+#        endif
+#        include <intrin.h>
+#        pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x)                                       \
+            [](size_t mask) noexcept -> int {                                             \
+                unsigned long index;                                                      \
+                return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
+                                                                : ROBIN_HOOD(BITNESS);    \
+            }(x)
+#    else
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
+#        endif
+#        define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
+#    endif
+#endif
+
+// fallthrough
+#ifndef __has_cpp_attribute // For backwards compatibility
+#    define __has_cpp_attribute(x) 0
+#endif
+#if __has_cpp_attribute(clang::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH()
+#endif
+
+// likely/unlikely
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_LIKELY(condition) condition
+#    define ROBIN_HOOD_UNLIKELY(condition) condition
+#else
+#    define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1)
+#    define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0)
+#endif
+
+// detect if native wchar_t type is availiable in MSVC
+#ifdef _MSC_VER
+#    ifdef _NATIVE_WCHAR_T_DEFINED
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#endif
+
+// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr
+#ifdef _MSC_VER
+#    if _MSC_VER <= 1900
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
+#endif
+
+// workaround missing "is_trivially_copyable" in g++ < 5.0
+// See https://stackoverflow.com/a/31798726/48181
+#if defined(__GNUC__) && __GNUC__ < 5
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#else
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
+#endif
+
+// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD()
+#endif
+
+namespace robin_hood {
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+#    define ROBIN_HOOD_STD std
+#else
+
+// c++11 compatibility layer
+namespace ROBIN_HOOD_STD {
+template <class T>
+struct alignment_of
+    : std::integral_constant<std::size_t, alignof(typename std::remove_all_extents<T>::type)> {};
+
+template <class T, T... Ints>
+class integer_sequence {
+public:
+    using value_type = T;
+    static_assert(std::is_integral<value_type>::value, "not integral type");
+    static constexpr std::size_t size() noexcept {
+        return sizeof...(Ints);
+    }
+};
+template <std::size_t... Inds>
+using index_sequence = integer_sequence<std::size_t, Inds...>;
+
+namespace detail_ {
+template <class T, T Begin, T End, bool>
+struct IntSeqImpl {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)");
+
+    template <class, class>
+    struct IntSeqCombiner;
+
+    template <TValue... Inds0, TValue... Inds1>
+    struct IntSeqCombiner<integer_sequence<TValue, Inds0...>, integer_sequence<TValue, Inds1...>> {
+        using TResult = integer_sequence<TValue, Inds0..., Inds1...>;
+    };
+
+    using TResult =
+        typename IntSeqCombiner<typename IntSeqImpl<TValue, Begin, Begin + (End - Begin) / 2,
+                                                    (End - Begin) / 2 == 1>::TResult,
+                                typename IntSeqImpl<TValue, Begin + (End - Begin) / 2, End,
+                                                    (End - Begin + 1) / 2 == 1>::TResult>::TResult;
+};
+
+template <class T, T Begin>
+struct IntSeqImpl<T, Begin, Begin, false> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue>;
+};
+
+template <class T, T Begin, T End>
+struct IntSeqImpl<T, Begin, End, true> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue, Begin>;
+};
+} // namespace detail_
+
+template <class T, T N>
+using make_integer_sequence = typename detail_::IntSeqImpl<T, 0, N, (N - 0) == 1>::TResult;
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+template <class... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+} // namespace ROBIN_HOOD_STD
+
+#endif
+
+namespace detail {
+
+// make sure we static_cast to the correct type for hash_int
+#if ROBIN_HOOD(BITNESS) == 64
+using SizeT = uint64_t;
+#else
+using SizeT = uint32_t;
+#endif
+
+template <typename T>
+T rotr(T x, unsigned k) {
+    return (x >> k) | (x << (8U * sizeof(T) - k));
+}
+
+// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to
+// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with
+// care!
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
+// inlinings more difficult. Throws are also generally the slow path.
+template <typename E, typename... Args>
+[[noreturn]] ROBIN_HOOD(NOINLINE)
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+    void doThrow(Args&&... args) {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+    throw E(std::forward<Args>(args)...);
+}
+#else
+    void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
+    abort();
+}
+#endif
+
+template <typename E, typename T, typename... Args>
+T* assertNotNull(T* t, Args&&... args) {
+    if (ROBIN_HOOD_UNLIKELY(nullptr == t)) {
+        doThrow<E>(std::forward<Args>(args)...);
+    }
+    return t;
+}
+
+template <typename T>
+inline T unaligned_load(void const* ptr) noexcept {
+    // using memcpy so we don't get into unaligned load problems.
+    // compiler should optimize this very well anyways.
+    T t;
+    std::memcpy(&t, ptr, sizeof(T));
+    return t;
+}
+
+// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor,
+// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a
+// pointer.
+template <typename T, size_t MinNumAllocs = 4, size_t MaxNumAllocs = 256>
+class BulkPoolAllocator {
+public:
+    BulkPoolAllocator() noexcept = default;
+
+    // does not copy anything, just creates a new allocator.
+    BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept
+        : mHead(nullptr)
+        , mListForFree(nullptr) {}
+
+    BulkPoolAllocator(BulkPoolAllocator&& o) noexcept
+        : mHead(o.mHead)
+        , mListForFree(o.mListForFree) {
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+    }
+
+    BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept {
+        reset();
+        mHead = o.mHead;
+        mListForFree = o.mListForFree;
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+        return *this;
+    }
+
+    BulkPoolAllocator&
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept {
+        // does not do anything
+        return *this;
+    }
+
+    ~BulkPoolAllocator() noexcept {
+        reset();
+    }
+
+    // Deallocates all allocated memory.
+    void reset() noexcept {
+        while (mListForFree) {
+            T* tmp = *mListForFree;
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mListForFree);
+            mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        }
+        mHead = nullptr;
+    }
+
+    // allocates, but does NOT initialize. Use in-place new constructor, e.g.
+    //   T* obj = pool.allocate();
+    //   ::new (static_cast<void*>(obj)) T();
+    T* allocate() {
+        T* tmp = mHead;
+        if (!tmp) {
+            tmp = performAllocation();
+        }
+
+        mHead = *reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        return tmp;
+    }
+
+    // does not actually deallocate but puts it in store.
+    // make sure you have already called the destructor! e.g. with
+    //  obj->~T();
+    //  pool.deallocate(obj);
+    void deallocate(T* obj) noexcept {
+        *reinterpret_cast_no_cast_align_warning<T**>(obj) = mHead;
+        mHead = obj;
+    }
+
+    // Adds an already allocated block of memory to the allocator. This allocator is from now on
+    // responsible for freeing the data (with free()). If the provided data is not large enough to
+    // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor.
+    void addOrFree(void* ptr, const size_t numBytes) noexcept {
+        // calculate number of available elements in ptr
+        if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
+            // not enough data for at least one element. Free and return.
+            ROBIN_HOOD_LOG("std::free")
+            std::free(ptr);
+        } else {
+            ROBIN_HOOD_LOG("add to buffer")
+            add(ptr, numBytes);
+        }
+    }
+
+    void swap(BulkPoolAllocator<T, MinNumAllocs, MaxNumAllocs>& other) noexcept {
+        using std::swap;
+        swap(mHead, other.mHead);
+        swap(mListForFree, other.mListForFree);
+    }
+
+private:
+    // iterates the list of allocated memory to calculate how many to alloc next.
+    // Recalculating this each time saves us a size_t member.
+    // This ignores the fact that memory blocks might have been added manually with addOrFree. In
+    // practice, this should not matter much.
+    ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept {
+        auto tmp = mListForFree;
+        size_t numAllocs = MinNumAllocs;
+
+        while (numAllocs * 2 <= MaxNumAllocs && tmp) {
+            auto x = reinterpret_cast<T***>(tmp);
+            tmp = *x;
+            numAllocs *= 2;
+        }
+
+        return numAllocs;
+    }
+
+    // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree().
+    void add(void* ptr, const size_t numBytes) noexcept {
+        const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE;
+
+        auto data = reinterpret_cast<T**>(ptr);
+
+        // link free list
+        auto x = reinterpret_cast<T***>(data);
+        *x = mListForFree;
+        mListForFree = data;
+
+        // create linked list for newly allocated data
+        auto* const headT =
+            reinterpret_cast_no_cast_align_warning<T*>(reinterpret_cast<char*>(ptr) + ALIGNMENT);
+
+        auto* const head = reinterpret_cast<char*>(headT);
+
+        // Visual Studio compiler automatically unrolls this loop, which is pretty cool
+        for (size_t i = 0; i < numElements; ++i) {
+            *reinterpret_cast_no_cast_align_warning<char**>(head + i * ALIGNED_SIZE) =
+                head + (i + 1) * ALIGNED_SIZE;
+        }
+
+        // last one points to 0
+        *reinterpret_cast_no_cast_align_warning<T**>(head + (numElements - 1) * ALIGNED_SIZE) =
+            mHead;
+        mHead = headT;
+    }
+
+    // Called when no memory is available (mHead == 0).
+    // Don't inline this slow path.
+    ROBIN_HOOD(NOINLINE) T* performAllocation() {
+        size_t const numElementsToAlloc = calcNumElementsToAlloc();
+
+        // alloc new memory: [prev |T, T, ... T]
+        size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
+        ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
+                                      << " * " << numElementsToAlloc)
+        add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
+        return mHead;
+    }
+
+    // enforce byte alignment of the T's
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+    static constexpr size_t ALIGNMENT =
+        (std::max)(std::alignment_of<T>::value, std::alignment_of<T*>::value);
+#else
+    static const size_t ALIGNMENT =
+        (ROBIN_HOOD_STD::alignment_of<T>::value > ROBIN_HOOD_STD::alignment_of<T*>::value)
+            ? ROBIN_HOOD_STD::alignment_of<T>::value
+            : +ROBIN_HOOD_STD::alignment_of<T*>::value; // the + is for walkarround
+#endif
+
+    static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT;
+
+    static_assert(MinNumAllocs >= 1, "MinNumAllocs");
+    static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs");
+    static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE");
+    static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod");
+    static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT");
+
+    T* mHead{nullptr};
+    T** mListForFree{nullptr};
+};
+
+template <typename T, size_t MinSize, size_t MaxSize, bool IsFlat>
+struct NodeAllocator;
+
+// dummy allocator that does nothing
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, true> {
+
+    // we are not using the data, so just free it.
+    void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
+        ROBIN_HOOD_LOG("std::free")
+        std::free(ptr);
+    }
+};
+
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, false> : public BulkPoolAllocator<T, MinSize, MaxSize> {};
+
+// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making
+// my own here.
+namespace swappable {
+#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17)
+using std::swap;
+template <typename T>
+struct nothrow {
+    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
+};
+#else
+template <typename T>
+struct nothrow {
+    static const bool value = std::is_nothrow_swappable<T>::value;
+};
+#endif
+} // namespace swappable
+
+} // namespace detail
+
+struct is_transparent_tag {};
+
+// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable,
+// which means it would  not be allowed to be used in std::memcpy. This struct is copyable, which is
+// also tested.
+template <typename T1, typename T2>
+struct pair {
+    using first_type = T1;
+    using second_type = T2;
+
+    template <typename U1 = T1, typename U2 = T2,
+              typename = typename std::enable_if<std::is_default_constructible<U1>::value &&
+                                                 std::is_default_constructible<U2>::value>::type>
+    constexpr pair() noexcept(noexcept(U1()) && noexcept(U2()))
+        : first()
+        , second() {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2> const& o) noexcept(
+        noexcept(T1(std::declval<T1 const&>())) && noexcept(T2(std::declval<T2 const&>())))
+        : first(o.first)
+        , second(o.second) {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2>&& o) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(o.first))
+        , second(std::move(o.second)) {}
+
+    constexpr pair(T1&& a, T2&& b) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(a))
+        , second(std::move(b)) {}
+
+    template <typename U1, typename U2>
+    constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward<U1>(
+        std::declval<U1&&>()))) && noexcept(T2(std::forward<U2>(std::declval<U2&&>()))))
+        : first(std::forward<U1>(a))
+        , second(std::forward<U2>(b)) {}
+
+    template <typename... U1, typename... U2>
+    // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members"
+    // if this constructor is constexpr
+#if !ROBIN_HOOD(BROKEN_CONSTEXPR)
+    constexpr
+#endif
+        pair(std::piecewise_construct_t /*unused*/, std::tuple<U1...> a,
+             std::tuple<U2...>
+                 b) noexcept(noexcept(pair(std::declval<std::tuple<U1...>&>(),
+                                           std::declval<std::tuple<U2...>&>(),
+                                           ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+                                           ROBIN_HOOD_STD::index_sequence_for<U2...>())))
+        : pair(a, b, ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+               ROBIN_HOOD_STD::index_sequence_for<U2...>()) {
+    }
+
+    // constructor called from the std::piecewise_construct_t ctor
+    template <typename... U1, size_t... I1, typename... U2, size_t... I2>
+    pair(std::tuple<U1...>& a, std::tuple<U2...>& b, ROBIN_HOOD_STD::index_sequence<I1...> /*unused*/, ROBIN_HOOD_STD::index_sequence<I2...> /*unused*/) noexcept(
+        noexcept(T1(std::forward<U1>(std::get<I1>(
+            std::declval<std::tuple<
+                U1...>&>()))...)) && noexcept(T2(std::
+                                                     forward<U2>(std::get<I2>(
+                                                         std::declval<std::tuple<U2...>&>()))...)))
+        : first(std::forward<U1>(std::get<I1>(a))...)
+        , second(std::forward<U2>(std::get<I2>(b))...) {
+        // make visual studio compiler happy about warning about unused a & b.
+        // Visual studio's pair implementation disables warning 4100.
+        (void)a;
+        (void)b;
+    }
+
+    void swap(pair<T1, T2>& o) noexcept((detail::swappable::nothrow<T1>::value) &&
+                                        (detail::swappable::nothrow<T2>::value)) {
+        using std::swap;
+        swap(first, o.first);
+        swap(second, o.second);
+    }
+
+    T1 first;  // NOLINT(misc-non-private-member-variables-in-classes)
+    T2 second; // NOLINT(misc-non-private-member-variables-in-classes)
+};
+
+template <typename A, typename B>
+inline void swap(pair<A, B>& a, pair<A, B>& b) noexcept(
+    noexcept(std::declval<pair<A, B>&>().swap(std::declval<pair<A, B>&>()))) {
+    a.swap(b);
+}
+
+template <typename A, typename B>
+inline constexpr bool operator==(pair<A, B> const& x, pair<A, B> const& y) {
+    return (x.first == y.first) && (x.second == y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator!=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x == y);
+}
+template <typename A, typename B>
+inline constexpr bool operator<(pair<A, B> const& x, pair<A, B> const& y) noexcept(noexcept(
+    std::declval<A const&>() < std::declval<A const&>()) && noexcept(std::declval<B const&>() <
+                                                                     std::declval<B const&>())) {
+    return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator>(pair<A, B> const& x, pair<A, B> const& y) {
+    return y < x;
+}
+template <typename A, typename B>
+inline constexpr bool operator<=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x > y);
+}
+template <typename A, typename B>
+inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x < y);
+}
+
+inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
+    static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
+    static constexpr uint64_t seed = UINT64_C(0xe17a1465);
+    static constexpr unsigned int r = 47;
+
+    auto const* const data64 = static_cast<uint64_t const*>(ptr);
+    uint64_t h = seed ^ (len * m);
+
+    size_t const n_blocks = len / 8;
+    for (size_t i = 0; i < n_blocks; ++i) {
+        auto k = detail::unaligned_load<uint64_t>(data64 + i);
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h ^= k;
+        h *= m;
+    }
+
+    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
+    switch (len & 7U) {
+    case 7:
+        h ^= static_cast<uint64_t>(data8[6]) << 48U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 6:
+        h ^= static_cast<uint64_t>(data8[5]) << 40U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 5:
+        h ^= static_cast<uint64_t>(data8[4]) << 32U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 4:
+        h ^= static_cast<uint64_t>(data8[3]) << 24U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 3:
+        h ^= static_cast<uint64_t>(data8[2]) << 16U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 2:
+        h ^= static_cast<uint64_t>(data8[1]) << 8U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 1:
+        h ^= static_cast<uint64_t>(data8[0]);
+        h *= m;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    default:
+        break;
+    }
+
+    h ^= h >> r;
+
+    // not doing the final step here, because this will be done by keyToIdx anyways
+    // h *= m;
+    // h ^= h >> r;
+    return static_cast<size_t>(h);
+}
+
+inline size_t hash_int(uint64_t x) noexcept {
+    // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested,
+    // and doesn't need any special 128bit operations.
+    x ^= x >> 33U;
+    x *= UINT64_C(0xff51afd7ed558ccd);
+    x ^= x >> 33U;
+
+    // not doing the final step here, because this will be done by keyToIdx anyways
+    // x *= UINT64_C(0xc4ceb9fe1a85ec53);
+    // x ^= x >> 33U;
+    return static_cast<size_t>(x);
+}
+
+// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
+template <typename T, typename Enable = void>
+struct hash : public std::hash<T> {
+    size_t operator()(T const& obj) const
+        noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
+        // call base hash
+        auto result = std::hash<T>::operator()(obj);
+        // return mixed of that, to be save against identity has
+        return hash_int(static_cast<detail::SizeT>(result));
+    }
+};
+
+template <typename CharT>
+struct hash<std::basic_string<CharT>> {
+    size_t operator()(std::basic_string<CharT> const& str) const noexcept {
+        return hash_bytes(str.data(), sizeof(CharT) * str.size());
+    }
+};
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+template <typename CharT>
+struct hash<std::basic_string_view<CharT>> {
+    size_t operator()(std::basic_string_view<CharT> const& sv) const noexcept {
+        return hash_bytes(sv.data(), sizeof(CharT) * sv.size());
+    }
+};
+#endif
+
+template <class T>
+struct hash<T*> {
+    size_t operator()(T* ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr));
+    }
+};
+
+template <class T>
+struct hash<std::unique_ptr<T>> {
+    size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <class T>
+struct hash<std::shared_ptr<T>> {
+    size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <typename Enum>
+struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
+    size_t operator()(Enum e) const noexcept {
+        using Underlying = typename std::underlying_type<Enum>::type;
+        return hash<Underlying>{}(static_cast<Underlying>(e));
+    }
+};
+
+#define ROBIN_HOOD_HASH_INT(T)                           \
+    template <>                                          \
+    struct hash<T> {                                     \
+        size_t operator()(T const& obj) const noexcept { \
+            return hash_int(static_cast<uint64_t>(obj)); \
+        }                                                \
+    }
+
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wuseless-cast"
+#endif
+// see https://en.cppreference.com/w/cpp/utility/hash
+ROBIN_HOOD_HASH_INT(bool);
+ROBIN_HOOD_HASH_INT(char);
+ROBIN_HOOD_HASH_INT(signed char);
+ROBIN_HOOD_HASH_INT(unsigned char);
+ROBIN_HOOD_HASH_INT(char16_t);
+ROBIN_HOOD_HASH_INT(char32_t);
+#if ROBIN_HOOD(HAS_NATIVE_WCHART)
+ROBIN_HOOD_HASH_INT(wchar_t);
+#endif
+ROBIN_HOOD_HASH_INT(short);
+ROBIN_HOOD_HASH_INT(unsigned short);
+ROBIN_HOOD_HASH_INT(int);
+ROBIN_HOOD_HASH_INT(unsigned int);
+ROBIN_HOOD_HASH_INT(long);
+ROBIN_HOOD_HASH_INT(long long);
+ROBIN_HOOD_HASH_INT(unsigned long);
+ROBIN_HOOD_HASH_INT(unsigned long long);
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic pop
+#endif
+namespace detail {
+
+template <typename T>
+struct void_type {
+    using type = void;
+};
+
+template <typename T, typename = void>
+struct has_is_transparent : public std::false_type {};
+
+template <typename T>
+struct has_is_transparent<T, typename void_type<typename T::is_transparent>::type>
+    : public std::true_type {};
+
+// using wrapper classes for hash and key_equal prevents the diamond problem when the same type
+// is used. see https://stackoverflow.com/a/28771920/48181
+template <typename T>
+struct WrapHash : public T {
+    WrapHash() = default;
+    explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+template <typename T>
+struct WrapKeyEqual : public T {
+    WrapKeyEqual() = default;
+    explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+// A highly optimized hashmap implementation, using the Robin Hood algorithm.
+//
+// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but
+// be about 2x faster in most cases and require much less allocations.
+//
+// This implementation uses the following memory layout:
+//
+// [Node, Node, ... Node | info, info, ... infoSentinel ]
+//
+// * Node: either a DataNode that directly has the std::pair<key, val> as member,
+//   or a DataNode with a pointer to std::pair<key,val>. Which DataNode representation to use
+//   depends on how fast the swap() operation is. Heuristically, this is automatically choosen
+//   based on sizeof(). there are always 2^n Nodes.
+//
+// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes.
+//   Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the
+//   corresponding node contains data. Set to 2 means the corresponding Node is filled, but it
+//   actually belongs to the previous position and was pushed out because that place is already
+//   taken.
+//
+// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the
+//   need for a idx variable.
+//
+// According to STL, order of templates has effect on throughput. That's why I've moved the
+// boolean to the front.
+// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/
+template <bool IsFlat, size_t MaxLoadFactor100, typename Key, typename T, typename Hash,
+          typename KeyEqual>
+class Table
+    : public WrapHash<Hash>,
+      public WrapKeyEqual<KeyEqual>,
+      detail::NodeAllocator<
+          typename std::conditional<
+              std::is_void<T>::value, Key,
+              robin_hood::pair<typename std::conditional<IsFlat, Key, Key const>::type, T>>::type,
+          4, 16384, IsFlat> {
+public:
+    static constexpr bool is_flat = IsFlat;
+    static constexpr bool is_map = !std::is_void<T>::value;
+    static constexpr bool is_set = !is_map;
+    static constexpr bool is_transparent =
+        has_is_transparent<Hash>::value && has_is_transparent<KeyEqual>::value;
+
+    using key_type = Key;
+    using mapped_type = T;
+    using value_type = typename std::conditional<
+        is_set, Key,
+        robin_hood::pair<typename std::conditional<is_flat, Key, Key const>::type, T>>::type;
+    using size_type = size_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using Self = Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+
+private:
+    static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100,
+                  "MaxLoadFactor100 needs to be >10 && < 100");
+
+    using WHash = WrapHash<Hash>;
+    using WKeyEqual = WrapKeyEqual<KeyEqual>;
+
+    // configuration defaults
+
+    // make sure we have 8 elements, needed to quickly rehash mInfo
+    static constexpr size_t InitialNumElements = sizeof(uint64_t);
+    static constexpr uint32_t InitialInfoNumBits = 5;
+    static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
+    static constexpr size_t InfoMask = InitialInfoInc - 1U;
+    static constexpr uint8_t InitialInfoHashShift = 0;
+    using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
+
+    // type needs to be wider than uint8_t.
+    using InfoType = uint32_t;
+
+    // DataNode ////////////////////////////////////////////////////////
+
+    // Primary template for the data node. We have special implementations for small and big
+    // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these
+    // on the heap so swap merely swaps a pointer.
+    template <typename M, bool>
+    class DataNode {};
+
+    // Small: just allocate on the stack.
+    template <typename M>
+    class DataNode<M, true> final {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept(
+            noexcept(value_type(std::forward<Args>(args)...)))
+            : mData(std::forward<Args>(args)...) {}
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, true>&& n) noexcept(
+            std::is_nothrow_move_constructible<value_type>::value)
+            : mData(std::move(n.mData)) {}
+
+        // doesn't do anything
+        void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {}
+        void destroyDoNotDeallocate() noexcept {}
+
+        value_type const* operator->() const noexcept {
+            return &mData;
+        }
+        value_type* operator->() noexcept {
+            return &mData;
+        }
+
+        const value_type& operator*() const noexcept {
+            return mData;
+        }
+
+        value_type& operator*() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData.second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, MT const&>::type getSecond() const noexcept {
+            return mData.second;
+        }
+
+        void swap(DataNode<M, true>& o) noexcept(
+            noexcept(std::declval<value_type>().swap(std::declval<value_type>()))) {
+            mData.swap(o.mData);
+        }
+
+    private:
+        value_type mData;
+    };
+
+    // big object: allocate on heap.
+    template <typename M>
+    class DataNode<M, false> {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& map, Args&&... args)
+            : mData(map.allocate()) {
+            ::new (static_cast<void*>(mData)) value_type(std::forward<Args>(args)...);
+        }
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, false>&& n) noexcept
+            : mData(std::move(n.mData)) {}
+
+        void destroy(M& map) noexcept {
+            // don't deallocate, just put it into list of datapool.
+            mData->~value_type();
+            map.deallocate(mData);
+        }
+
+        void destroyDoNotDeallocate() noexcept {
+            mData->~value_type();
+        }
+
+        value_type const* operator->() const noexcept {
+            return mData;
+        }
+
+        value_type* operator->() noexcept {
+            return mData;
+        }
+
+        const value_type& operator*() const {
+            return *mData;
+        }
+
+        value_type& operator*() {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return *mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData->second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT const&>::type getSecond() const noexcept {
+            return mData->second;
+        }
+
+        void swap(DataNode<M, false>& o) noexcept {
+            using std::swap;
+            swap(mData, o.mData);
+        }
+
+    private:
+        value_type* mData;
+    };
+
+    using Node = DataNode<Self, IsFlat>;
+
+    // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required)
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept {
+        return n.getFirst();
+    }
+
+    // in case we have void mapped_type, we are not using a pair, thus we just route k through.
+    // No need to disable this because it's just not used if not applicable.
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept {
+        return k;
+    }
+
+    // in case we have non-void mapped_type, we have a standard robin_hood::pair
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, key_type const&>::type
+        getFirstConst(value_type const& vt) const noexcept {
+        return vt.first;
+    }
+
+    // Cloner //////////////////////////////////////////////////////////
+
+    template <typename M, bool UseMemcpy>
+    struct Cloner;
+
+    // fast path: Just copy data, without allocating anything.
+    template <typename M>
+    struct Cloner<M, true> {
+        void operator()(M const& source, M& target) const {
+            auto const* const src = reinterpret_cast<char const*>(source.mKeyVals);
+            auto* tgt = reinterpret_cast<char*>(target.mKeyVals);
+            auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1);
+            std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt);
+        }
+    };
+
+    template <typename M>
+    struct Cloner<M, false> {
+        void operator()(M const& s, M& t) const {
+            auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1);
+            std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo);
+
+            for (size_t i = 0; i < numElementsWithBuffer; ++i) {
+                if (t.mInfo[i]) {
+                    ::new (static_cast<void*>(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]);
+                }
+            }
+        }
+    };
+
+    // Destroyer ///////////////////////////////////////////////////////
+
+    template <typename M, bool IsFlatAndTrivial>
+    struct Destroyer {};
+
+    template <typename M>
+    struct Destroyer<M, true> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+    };
+
+    template <typename M>
+    struct Destroyer<M, false> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroy(m);
+                    n.~Node();
+                }
+            }
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroyDoNotDeallocate();
+                    n.~Node();
+                }
+            }
+        }
+    };
+
+    // Iter ////////////////////////////////////////////////////////////
+
+    struct fast_forward_tag {};
+
+    // generic iterator for both const_iterator and iterator.
+    template <bool IsConst>
+    // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions)
+    class Iter {
+    private:
+        using NodePtr = typename std::conditional<IsConst, Node const*, Node*>::type;
+
+    public:
+        using difference_type = std::ptrdiff_t;
+        using value_type = typename Self::value_type;
+        using reference = typename std::conditional<IsConst, value_type const&, value_type&>::type;
+        using pointer = typename std::conditional<IsConst, value_type const*, value_type*>::type;
+        using iterator_category = std::forward_iterator_tag;
+
+        // default constructed iterator can be compared to itself, but WON'T return true when
+        // compared to end().
+        Iter() = default;
+
+        // Rule of zero: nothing specified. The conversion constructor is only enabled for
+        // iterator to const_iterator, so it doesn't accidentally work as a copy ctor.
+
+        // Conversion constructor from iterator to const_iterator.
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        // NOLINTNEXTLINE(hicpp-explicit-conversions)
+        Iter(Iter<OtherIsConst> const& other) noexcept
+            : mKeyVals(other.mKeyVals)
+            , mInfo(other.mInfo) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr,
+             fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {
+            fastForward();
+        }
+
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        Iter& operator=(Iter<OtherIsConst> const& other) noexcept {
+            mKeyVals = other.mKeyVals;
+            mInfo = other.mInfo;
+            return *this;
+        }
+
+        // prefix increment. Undefined behavior if we are at end()!
+        Iter& operator++() noexcept {
+            mInfo++;
+            mKeyVals++;
+            fastForward();
+            return *this;
+        }
+
+        Iter operator++(int) noexcept {
+            Iter tmp = *this;
+            ++(*this);
+            return tmp;
+        }
+
+        reference operator*() const {
+            return **mKeyVals;
+        }
+
+        pointer operator->() const {
+            return &**mKeyVals;
+        }
+
+        template <bool O>
+        bool operator==(Iter<O> const& o) const noexcept {
+            return mKeyVals == o.mKeyVals;
+        }
+
+        template <bool O>
+        bool operator!=(Iter<O> const& o) const noexcept {
+            return mKeyVals != o.mKeyVals;
+        }
+
+    private:
+        // fast forward to the next non-free info byte
+        // I've tried a few variants that don't depend on intrinsics, but unfortunately they are
+        // quite a bit slower than this one. So I've reverted that change again. See map_benchmark.
+        void fastForward() noexcept {
+            size_t n = 0;
+            while (0U == (n = detail::unaligned_load<size_t>(mInfo))) {
+                mInfo += sizeof(size_t);
+                mKeyVals += sizeof(size_t);
+            }
+#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+            // we know for certain that within the next 8 bytes we'll find a non-zero one.
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
+                mInfo += 4;
+                mKeyVals += 4;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
+                mInfo += 2;
+                mKeyVals += 2;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
+                mInfo += 1;
+                mKeyVals += 1;
+            }
+#else
+#    if ROBIN_HOOD(LITTLE_ENDIAN)
+            auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+#    else
+            auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
+#    endif
+            mInfo += inc;
+            mKeyVals += inc;
+#endif
+        }
+
+        friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+        NodePtr mKeyVals{nullptr};
+        uint8_t const* mInfo{nullptr};
+    };
+
+    ////////////////////////////////////////////////////////////////////
+
+    // highly performance relevant code.
+    // Lower bits are used for indexing into the array (2^n size)
+    // The upper 1-5 bits need to be a reasonable good hash, to save comparisons.
+    template <typename HashKey>
+    void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const {
+        // In addition to whatever hash is used, add another mul & shift so we get better hashing.
+        // This serves as a bad hash prevention, if the given data is
+        // badly mixed.
+        auto h = static_cast<uint64_t>(WHash::operator()(key));
+
+        h *= mHashMultiplier;
+        h ^= h >> 33U;
+
+        // the lower InitialInfoNumBits are reserved for info.
+        *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
+        *idx = (static_cast<size_t>(h) >> InitialInfoNumBits) & mMask;
+    }
+
+    // forwards the index by one, wrapping around at the end
+    void next(InfoType* info, size_t* idx) const noexcept {
+        *idx = *idx + 1;
+        *info += mInfoInc;
+    }
+
+    void nextWhileLess(InfoType* info, size_t* idx) const noexcept {
+        // unrolling this by hand did not bring any speedups.
+        while (*info < mInfo[*idx]) {
+            next(info, idx);
+        }
+    }
+
+    // Shift everything up by one element. Tries to move stuff around.
+    void
+    shiftUp(size_t startIdx,
+            size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        auto idx = startIdx;
+        ::new (static_cast<void*>(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1]));
+        while (--idx != insertion_idx) {
+            mKeyVals[idx] = std::move(mKeyVals[idx - 1]);
+        }
+
+        idx = startIdx;
+        while (idx != insertion_idx) {
+            ROBIN_HOOD_COUNT(shiftUp)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx - 1] + mInfoInc);
+            if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+            --idx;
+        }
+    }
+
+    void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        // until we find one that is either empty or has zero offset.
+        // TODO(martinus) we don't need to move everything, just the last one for the same
+        // bucket.
+        mKeyVals[idx].destroy(*this);
+
+        // until we find one that is either empty or has zero offset.
+        while (mInfo[idx + 1] >= 2 * mInfoInc) {
+            ROBIN_HOOD_COUNT(shiftDown)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx + 1] - mInfoInc);
+            mKeyVals[idx] = std::move(mKeyVals[idx + 1]);
+            ++idx;
+        }
+
+        mInfo[idx] = 0;
+        // don't destroy, we've moved it
+        // mKeyVals[idx].destroy(*this);
+        mKeyVals[idx].~Node();
+    }
+
+    // copy of find(), except that it returns iterator instead of const_iterator.
+    template <typename Other>
+    ROBIN_HOOD(NODISCARD)
+    size_t findIdx(Other const& key) const {
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        do {
+            // unrolling this twice gives a bit of a speedup. More unrolling did not help.
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found!
+        return mMask == 0 ? 0
+                          : static_cast<size_t>(std::distance(
+                                mKeyVals, reinterpret_cast_no_cast_align_warning<Node*>(mInfo)));
+    }
+
+    void cloneData(const Table& o) {
+        Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
+    }
+
+    // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
+    // @return True on success, false if something went wrong
+    void insert_move(Node&& keyval) {
+        // we don't retry, fail if overflowing
+        // don't need to check max num elements
+        if (0 == mMaxNumElementsAllowed && !try_increase_info()) {
+            throwOverflowError();
+        }
+
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(keyval.getFirst(), &idx, &info);
+
+        // skip forward. Use <= because we are certain that the element is not there.
+        while (info <= mInfo[idx]) {
+            idx = idx + 1;
+            info += mInfoInc;
+        }
+
+        // key not found, so we are now exactly where we want to insert it.
+        auto const insertion_idx = idx;
+        auto const insertion_info = static_cast<uint8_t>(info);
+        if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+            mMaxNumElementsAllowed = 0;
+        }
+
+        // find an empty spot
+        while (0 != mInfo[idx]) {
+            next(&info, &idx);
+        }
+
+        auto& l = mKeyVals[insertion_idx];
+        if (idx == insertion_idx) {
+            ::new (static_cast<void*>(&l)) Node(std::move(keyval));
+        } else {
+            shiftUp(idx, insertion_idx);
+            l = std::move(keyval);
+        }
+
+        // put at empty spot
+        mInfo[insertion_idx] = insertion_info;
+
+        ++mNumElements;
+    }
+
+public:
+    using iterator = Iter<false>;
+    using const_iterator = Iter<true>;
+
+    Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
+        : WHash()
+        , WKeyEqual() {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
+    // This tremendously speeds up ctor & dtor of a map that never receives an element. The
+    // penalty is payed at the first insert, and not before. Lookup of this empty map works
+    // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
+    // standard, but we can ignore it.
+    explicit Table(
+        size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
+        const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    template <typename Iter>
+    Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0,
+          const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(first, last);
+    }
+
+    Table(std::initializer_list<value_type> initlist,
+          size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+          const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(initlist.begin(), initlist.end());
+    }
+
+    Table(Table&& o) noexcept
+        : WHash(std::move(static_cast<WHash&>(o)))
+        , WKeyEqual(std::move(static_cast<WKeyEqual&>(o)))
+        , DataPool(std::move(static_cast<DataPool&>(o))) {
+        ROBIN_HOOD_TRACE(this)
+        if (o.mMask) {
+            mHashMultiplier = std::move(o.mHashMultiplier);
+            mKeyVals = std::move(o.mKeyVals);
+            mInfo = std::move(o.mInfo);
+            mNumElements = std::move(o.mNumElements);
+            mMask = std::move(o.mMask);
+            mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+            mInfoInc = std::move(o.mInfoInc);
+            mInfoHashShift = std::move(o.mInfoHashShift);
+            // set other's mask to 0 so its destructor won't do anything
+            o.init();
+        }
+    }
+
+    Table& operator=(Table&& o) noexcept {
+        ROBIN_HOOD_TRACE(this)
+        if (&o != this) {
+            if (o.mMask) {
+                // only move stuff if the other map actually has some data
+                destroy();
+                mHashMultiplier = std::move(o.mHashMultiplier);
+                mKeyVals = std::move(o.mKeyVals);
+                mInfo = std::move(o.mInfo);
+                mNumElements = std::move(o.mNumElements);
+                mMask = std::move(o.mMask);
+                mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+                mInfoInc = std::move(o.mInfoInc);
+                mInfoHashShift = std::move(o.mInfoHashShift);
+                WHash::operator=(std::move(static_cast<WHash&>(o)));
+                WKeyEqual::operator=(std::move(static_cast<WKeyEqual&>(o)));
+                DataPool::operator=(std::move(static_cast<DataPool&>(o)));
+
+                o.init();
+
+            } else {
+                // nothing in the other map => just clear us.
+                clear();
+            }
+        }
+        return *this;
+    }
+
+    Table(const Table& o)
+        : WHash(static_cast<const WHash&>(o))
+        , WKeyEqual(static_cast<const WKeyEqual&>(o))
+        , DataPool(static_cast<const DataPool&>(o)) {
+        ROBIN_HOOD_TRACE(this)
+        if (!o.empty()) {
+            // not empty: create an exact copy. it is also possible to just iterate through all
+            // elements and insert them, but copying is probably faster.
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mHashMultiplier = o.mHashMultiplier;
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+            // no need for calloc because clonData does memcpy
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            mNumElements = o.mNumElements;
+            mMask = o.mMask;
+            mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+            mInfoInc = o.mInfoInc;
+            mInfoHashShift = o.mInfoHashShift;
+            cloneData(o);
+        }
+    }
+
+    // Creates a copy of the given map. Copy constructor of each entry is used.
+    // Not sure why clang-tidy thinks this doesn't handle self assignment, it does
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    Table& operator=(Table const& o) {
+        ROBIN_HOOD_TRACE(this)
+        if (&o == this) {
+            // prevent assigning of itself
+            return *this;
+        }
+
+        // we keep using the old allocator and not assign the new one, because we want to keep
+        // the memory available. when it is the same size.
+        if (o.empty()) {
+            if (0 == mMask) {
+                // nothing to do, we are empty too
+                return *this;
+            }
+
+            // not empty: destroy what we have there
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            destroy();
+            init();
+            WHash::operator=(static_cast<const WHash&>(o));
+            WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+            DataPool::operator=(static_cast<DataPool const&>(o));
+
+            return *this;
+        }
+
+        // clean up old stuff
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        if (mMask != o.mMask) {
+            // no luck: we don't have the same array size allocated, so we need to realloc.
+            if (0 != mMask) {
+                // only deallocate if we actually have data!
+                ROBIN_HOOD_LOG("std::free")
+                std::free(mKeyVals);
+            }
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+
+            // no need for calloc here because cloneData performs a memcpy.
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            // sentinel is set in cloneData
+        }
+        WHash::operator=(static_cast<const WHash&>(o));
+        WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+        DataPool::operator=(static_cast<DataPool const&>(o));
+        mHashMultiplier = o.mHashMultiplier;
+        mNumElements = o.mNumElements;
+        mMask = o.mMask;
+        mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+        mInfoInc = o.mInfoInc;
+        mInfoHashShift = o.mInfoHashShift;
+        cloneData(o);
+
+        return *this;
+    }
+
+    // Swaps everything between the two maps.
+    void swap(Table& o) {
+        ROBIN_HOOD_TRACE(this)
+        using std::swap;
+        swap(o, *this);
+    }
+
+    // Clears all data, without resizing.
+    void clear() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            // don't do anything! also important because we don't want to write to
+            // DummyInfoByte::b, even though we would just write 0 to it.
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+        // clear everything, then set the sentinel again
+        uint8_t const z = 0;
+        std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z);
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // Destroys the map and all it's contents.
+    ~Table() {
+        ROBIN_HOOD_TRACE(this)
+        destroy();
+    }
+
+    // Checks if both tables contain the same entries. Order is irrelevant.
+    bool operator==(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        if (other.size() != size()) {
+            return false;
+        }
+        for (auto const& otherEntry : other) {
+            if (!has(otherEntry)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool operator!=(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        return !operator==(other);
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(key),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(key), std::forward_as_tuple());
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+        }
+
+        return mKeyVals[idxAndState.first].getSecond();
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](key_type&& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] =
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+        }
+
+        return mKeyVals[idxAndState.first].getSecond();
+    }
+
+    template <typename Iter>
+    void insert(Iter first, Iter last) {
+        for (; first != last; ++first) {
+            // value_type ctor needed because this might be called with std::pair's
+            insert(value_type(*first));
+        }
+    }
+
+    void insert(std::initializer_list<value_type> ilist) {
+        for (auto&& vt : ilist) {
+            insert(std::move(vt));
+        }
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        Node n{*this, std::forward<Args>(args)...};
+        auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n));
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            n.destroy(*this);
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(*this, std::move(n));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = std::move(n);
+            break;
+
+        case InsertionState::overflow_error:
+            n.destroy(*this);
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    template <typename... Args>
+    iterator emplace_hint(const_iterator position, Args&&... args) {
+        (void)position;
+        return emplace(std::forward<Args>(args)...).first;
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& key, Args&&... args) {
+        return try_emplace_impl(key, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(key, std::forward<Args>(args)...).first;
+    }
+
+    template <typename... Args>
+    iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...).first;
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const key_type& key, Mapped&& obj) {
+        return insertOrAssignImpl(key, std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(key_type&& key, Mapped&& obj) {
+        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) {
+        (void)hint;
+        return insertOrAssignImpl(key, std::forward<Mapped>(obj)).first;
+    }
+
+    template <typename Mapped>
+    iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) {
+        (void)hint;
+        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj)).first;
+    }
+
+    std::pair<iterator, bool> insert(const value_type& keyval) {
+        ROBIN_HOOD_TRACE(this)
+        return emplace(keyval);
+    }
+
+    iterator insert(const_iterator hint, const value_type& keyval) {
+        (void)hint;
+        return emplace(keyval).first;
+    }
+
+    std::pair<iterator, bool> insert(value_type&& keyval) {
+        return emplace(std::move(keyval));
+    }
+
+    iterator insert(const_iterator hint, value_type&& keyval) {
+        (void)hint;
+        return emplace(std::move(keyval)).first;
+    }
+
+    // Returns 1 if key is found, 0 otherwise.
+    size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, size_t>::type count(const OtherKey& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        return 1U == count(key);
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, bool>::type contains(const OtherKey& key) const {
+        return 1U == count(key);
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type at(key_type const& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q const&>::type at(key_type const& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, // NOLINT(modernize-use-nodiscard)
+                            const_iterator>::type  // NOLINT(modernize-use-nodiscard)
+    find(const OtherKey& key) const {              // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator find(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    iterator find(const OtherKey& key, is_transparent_tag /*unused*/) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, iterator>::type find(const OtherKey& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator begin() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return end();
+        }
+        return iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+    const_iterator begin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cbegin();
+    }
+    const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return cend();
+        }
+        return const_iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+
+    iterator end() {
+        ROBIN_HOOD_TRACE(this)
+        // no need to supply valid info pointer: end() must not be dereferenced, and only node
+        // pointer is compared.
+        return iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+    const_iterator end() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cend();
+    }
+    const_iterator cend() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return const_iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+
+    iterator erase(const_iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // its safe to perform const cast here
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+        return erase(iterator{const_cast<Node*>(pos.mKeyVals), const_cast<uint8_t*>(pos.mInfo)});
+    }
+
+    // Erases element at pos, returns iterator to the next element.
+    iterator erase(iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // we assume that pos always points to a valid entry, and not end().
+        auto const idx = static_cast<size_t>(pos.mKeyVals - mKeyVals);
+
+        shiftDown(idx);
+        --mNumElements;
+
+        if (*pos.mInfo) {
+            // we've backward shifted, return this again
+            return pos;
+        }
+
+        // no backward shift, return next element
+        return ++pos;
+    }
+
+    size_t erase(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        // check while info matches with the source idx
+        do {
+            if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                shiftDown(idx);
+                --mNumElements;
+                return 1;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found to delete
+        return 0;
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // exactly the same as reserve(c).
+    void rehash(size_t c) {
+        // forces a reserve
+        reserve(c, true);
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
+    void reserve(size_t c) {
+        // reserve, but don't force rehash
+        reserve(c, false);
+    }
+
+    // If possible reallocates the map to a smaller one. This frees the underlying table.
+    // Does not do anything if load_factor is too large for decreasing the table's size.
+    void compact() {
+        ROBIN_HOOD_TRACE(this)
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+        // only actually do anything when the new size is bigger than the old one. This prevents to
+        // continuously allocate for each reserve() call.
+        if (newSize < mMask + 1) {
+            rehashPowerOfTwo(newSize, true);
+        }
+    }
+
+    size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return mNumElements;
+    }
+
+    size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<size_type>(-1);
+    }
+
+    ROBIN_HOOD(NODISCARD) bool empty() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return 0 == mNumElements;
+    }
+
+    float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return MaxLoadFactor100 / 100.0F;
+    }
+
+    // Average number of elements per bucket. Since we allow only 1 per bucket
+    float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<float>(size()) / static_cast<float>(mMask + 1);
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t mask() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return mMask;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
+        if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
+            return maxElements * MaxLoadFactor100 / 100;
+        }
+
+        // we might be a bit inprecise, but since maxElements is quite large that doesn't matter
+        return (maxElements / 100) * MaxLoadFactor100;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept {
+        // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load
+        // 64bit types.
+        return numElements + sizeof(uint64_t);
+    }
+
+    ROBIN_HOOD(NODISCARD)
+    size_t calcNumElementsWithBuffer(size_t numElements) const noexcept {
+        auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements);
+        return numElements + (std::min)(maxNumElementsAllowed, (static_cast<size_t>(0xFF)));
+    }
+
+    // calculation only allowed for 2^n values
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const {
+#if ROBIN_HOOD(BITNESS) == 64
+        return numElements * sizeof(Node) + calcNumBytesInfo(numElements);
+#else
+        // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows.
+        auto const ne = static_cast<uint64_t>(numElements);
+        auto const s = static_cast<uint64_t>(sizeof(Node));
+        auto const infos = static_cast<uint64_t>(calcNumBytesInfo(numElements));
+
+        auto const total64 = ne * s + infos;
+        auto const total = static_cast<size_t>(total64);
+
+        if (ROBIN_HOOD_UNLIKELY(static_cast<uint64_t>(total) != total64)) {
+            throwOverflowError();
+        }
+        return total;
+#endif
+    }
+
+private:
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(e.first);
+        return it != end() && it->second == e.second;
+    }
+
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        return find(e) != end();
+    }
+
+    void reserve(size_t c, bool forceRehash) {
+        ROBIN_HOOD_TRACE(this)
+        auto const minElementsAllowed = (std::max)(c, mNumElements);
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+        // only actually do anything when the new size is bigger than the old one. This prevents to
+        // continuously allocate for each reserve() call.
+        if (forceRehash || newSize > mMask + 1) {
+            rehashPowerOfTwo(newSize, false);
+        }
+    }
+
+    // reserves space for at least the specified number of elements.
+    // only works if numBuckets if power of two
+    // True on success, false otherwise
+    void rehashPowerOfTwo(size_t numBuckets, bool forceFree) {
+        ROBIN_HOOD_TRACE(this)
+
+        Node* const oldKeyVals = mKeyVals;
+        uint8_t const* const oldInfo = mInfo;
+
+        const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        // resize operation: move stuff
+        initData(numBuckets);
+        if (oldMaxElementsWithBuffer > 1) {
+            for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) {
+                if (oldInfo[i] != 0) {
+                    // might throw an exception, which is really bad since we are in the middle of
+                    // moving stuff.
+                    insert_move(std::move(oldKeyVals[i]));
+                    // destroy the node but DON'T destroy the data.
+                    oldKeyVals[i].~Node();
+                }
+            }
+
+            // this check is not necessary as it's guarded by the previous if, but it helps
+            // silence g++'s overeager "attempt to free a non-heap object 'map'
+            // [-Werror=free-nonheap-object]" warning.
+            if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+                // don't destroy old data: put it into the pool instead
+                if (forceFree) {
+                    std::free(oldKeyVals);
+                } else {
+                    DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+                }
+            }
+        }
+    }
+
+    ROBIN_HOOD(NOINLINE) void throwOverflowError() const {
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+        throw std::overflow_error("robin_hood::map overflow");
+#else
+        abort();
+#endif
+    }
+
+    template <typename OtherKey, typename... Args>
+    std::pair<iterator, bool> try_emplace_impl(OtherKey&& key, Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
+                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
+                std::forward_as_tuple(std::forward<Args>(args)...));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
+                                               std::forward_as_tuple(std::forward<Args>(args)...));
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    template <typename OtherKey, typename Mapped>
+    std::pair<iterator, bool> insertOrAssignImpl(OtherKey&& key, Mapped&& obj) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            mKeyVals[idxAndState.first].getSecond() = std::forward<Mapped>(obj);
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
+                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
+                std::forward_as_tuple(std::forward<Mapped>(obj)));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
+                                               std::forward_as_tuple(std::forward<Mapped>(obj)));
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    void initData(size_t max_elements) {
+        mNumElements = 0;
+        mMask = max_elements - 1;
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
+
+        // malloc & zero mInfo. Faster than calloc everything.
+        auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+        ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
+                                      << numElementsWithBuffer << ")")
+        mKeyVals = reinterpret_cast<Node*>(
+            detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+        mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+        std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node));
+
+        // set sentinel
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    enum class InsertionState { overflow_error, key_found, new_node, overwrite_node };
+
+    // Finds key, and if not already present prepares a spot where to pot the key & value.
+    // This potentially shifts nodes out of the way, updates mInfo and number of inserted
+    // elements, so the only operation left to do is create/assign a new node at that spot.
+    template <typename OtherKey>
+    std::pair<size_t, InsertionState> insertKeyPrepareEmptySpot(OtherKey&& key) {
+        for (int i = 0; i < 256; ++i) {
+            size_t idx{};
+            InfoType info{};
+            keyToIdx(key, &idx, &info);
+            nextWhileLess(&info, &idx);
+
+            // while we potentially have a match
+            while (info == mInfo[idx]) {
+                if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                    // key already exists, do NOT insert.
+                    // see http://en.cppreference.com/w/cpp/container/unordered_map/insert
+                    return std::make_pair(idx, InsertionState::key_found);
+                }
+                next(&info, &idx);
+            }
+
+            // unlikely that this evaluates to true
+            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
+                if (!increase_size()) {
+                    return std::make_pair(size_t(0), InsertionState::overflow_error);
+                }
+                continue;
+            }
+
+            // key not found, so we are now exactly where we want to insert it.
+            auto const insertion_idx = idx;
+            auto const insertion_info = info;
+            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+
+            // find an empty spot
+            while (0 != mInfo[idx]) {
+                next(&info, &idx);
+            }
+
+            if (idx != insertion_idx) {
+                shiftUp(idx, insertion_idx);
+            }
+            // put at empty spot
+            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
+            ++mNumElements;
+            return std::make_pair(insertion_idx, idx == insertion_idx
+                                                     ? InsertionState::new_node
+                                                     : InsertionState::overwrite_node);
+        }
+
+        // enough attempts failed, so finally give up.
+        return std::make_pair(size_t(0), InsertionState::overflow_error);
+    }
+
+    bool try_increase_info() {
+        ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements
+                                   << ", maxNumElementsAllowed="
+                                   << calcMaxNumElementsAllowed(mMask + 1))
+        if (mInfoInc <= 2) {
+            // need to be > 2 so that shift works (otherwise undefined behavior!)
+            return false;
+        }
+        // we got space left, try to make info smaller
+        mInfoInc = static_cast<uint8_t>(mInfoInc >> 1U);
+
+        // remove one bit of the hash, leaving more space for the distance info.
+        // This is extremely fast because we can operate on 8 bytes at once.
+        ++mInfoHashShift;
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        for (size_t i = 0; i < numElementsWithBuffer; i += 8) {
+            auto val = unaligned_load<uint64_t>(mInfo + i);
+            val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f);
+            std::memcpy(mInfo + i, &val, sizeof(val));
+        }
+        // update sentinel, which might have been cleared out!
+        mInfo[numElementsWithBuffer] = 1;
+
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        return true;
+    }
+
+    // True if resize was possible, false otherwise
+    bool increase_size() {
+        // nothing allocated yet? just allocate InitialNumElements
+        if (0 == mMask) {
+            initData(InitialNumElements);
+            return true;
+        }
+
+        auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        if (mNumElements < maxNumElementsAllowed && try_increase_info()) {
+            return true;
+        }
+
+        ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed="
+                                       << maxNumElementsAllowed << ", load="
+                                       << (static_cast<double>(mNumElements) * 100.0 /
+                                           (static_cast<double>(mMask) + 1)))
+
+        if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) {
+            // we have to resize, even though there would still be plenty of space left!
+            // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case
+            // we have to rehash a few times
+            nextHashMultiplier();
+            rehashPowerOfTwo(mMask + 1, true);
+        } else {
+            // we've reached the capacity of the map, so the hash seems to work nice. Keep using it.
+            rehashPowerOfTwo((mMask + 1) * 2, false);
+        }
+        return true;
+    }
+
+    void nextHashMultiplier() {
+        // adding an *even* number, so that the multiplier will always stay odd. This is necessary
+        // so that the hash stays a mixing function (and thus doesn't have any information loss).
+        mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54);
+    }
+
+    void destroy() {
+        if (0 == mMask) {
+            // don't deallocate!
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}
+            .nodesDoNotDeallocate(*this);
+
+        // This protection against not deleting mMask shouldn't be needed as it's sufficiently
+        // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
+        // reports a compile error: attempt to free a non-heap object 'fm'
+        // [-Werror=free-nonheap-object]
+        if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mKeyVals);
+        }
+    }
+
+    void init() noexcept {
+        mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask);
+        mInfo = reinterpret_cast<uint8_t*>(&mMask);
+        mNumElements = 0;
+        mMask = 0;
+        mMaxNumElementsAllowed = 0;
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // members are sorted so no padding occurs
+    uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53);                // 8 byte  8
+    Node* mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask); // 8 byte 16
+    uint8_t* mInfo = reinterpret_cast<uint8_t*>(&mMask);                    // 8 byte 24
+    size_t mNumElements = 0;                                                // 8 byte 32
+    size_t mMask = 0;                                                       // 8 byte 40
+    size_t mMaxNumElementsAllowed = 0;                                      // 8 byte 48
+    InfoType mInfoInc = InitialInfoInc;                                     // 4 byte 52
+    InfoType mInfoHashShift = InitialInfoHashShift;                         // 4 byte 56
+                                                    // 16 byte 56 if NodeAllocator
+};
+
+} // namespace detail
+
+// map
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_flat_map = detail::Table<true, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_node_map = detail::Table<false, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_map =
+    detail::Table<sizeof(robin_hood::pair<Key, T>) <= sizeof(size_t) * 6 &&
+                      std::is_nothrow_move_constructible<robin_hood::pair<Key, T>>::value &&
+                      std::is_nothrow_move_assignable<robin_hood::pair<Key, T>>::value,
+                  MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+// set
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_flat_set = detail::Table<true, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_node_set = detail::Table<false, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
+                                        std::is_nothrow_move_constructible<Key>::value &&
+                                        std::is_nothrow_move_assignable<Key>::value,
+                                    MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+} // namespace robin_hood
+
+#endif

From 82959c039487deed0e49a6568d37f2b1d3d835b7 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 28 Feb 2023 22:31:03 -0800
Subject: [PATCH 22/49] back to unordered map

---
 src/Common.hpp   |    3 +-
 src/robin_hood.h | 2544 ----------------------------------------------
 2 files changed, 1 insertion(+), 2546 deletions(-)
 delete mode 100644 src/robin_hood.h

diff --git a/src/Common.hpp b/src/Common.hpp
index 4cc8596..4b290a3 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -9,13 +9,12 @@
 #include <string>
 #include <unordered_map>
 #include <sstream>
-#include "robin_hood.h"
 #include "roaring.hh"
 #include "hash.hpp"
 
 #define BUSTOOLS_VERSION "0.42.0"
 
-#define u_map_ robin_hood::unordered_flat_map
+#define u_map_ std::unordered_map
 enum CAPTURE_TYPE : char
 {
   CAPTURE_NONE = 0,
diff --git a/src/robin_hood.h b/src/robin_hood.h
deleted file mode 100644
index 0af031f..0000000
--- a/src/robin_hood.h
+++ /dev/null
@@ -1,2544 +0,0 @@
-//                 ______  _____                 ______                _________
-//  ______________ ___  /_ ___(_)_______         ___  /_ ______ ______ ______  /
-//  __  ___/_  __ \__  __ \__  / __  __ \        __  __ \_  __ \_  __ \_  __  /
-//  _  /    / /_/ /_  /_/ /_  /  _  / / /        _  / / // /_/ // /_/ // /_/ /
-//  /_/     \____/ /_.___/ /_/   /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/
-//                                      _/_____/
-//
-// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
-// https://github.com/martinus/robin-hood-hashing
-//
-// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2021 Martin Ankerl <http://martin.ankerl.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-
-#ifndef ROBIN_HOOD_H_INCLUDED
-#define ROBIN_HOOD_H_INCLUDED
-
-// see https://semver.org/
-#define ROBIN_HOOD_VERSION_MAJOR 3  // for incompatible API changes
-#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner
-#define ROBIN_HOOD_VERSION_PATCH 5  // for backwards-compatible bug fixes
-
-#include <algorithm>
-#include <cstdlib>
-#include <cstring>
-#include <functional>
-#include <limits>
-#include <memory> // only to support hash of smart pointers
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-#if __cplusplus >= 201703L
-#    include <string_view>
-#endif
-
-// #define ROBIN_HOOD_LOG_ENABLED
-#ifdef ROBIN_HOOD_LOG_ENABLED
-#    include <iostream>
-#    define ROBIN_HOOD_LOG(...) \
-        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
-#else
-#    define ROBIN_HOOD_LOG(x)
-#endif
-
-// #define ROBIN_HOOD_TRACE_ENABLED
-#ifdef ROBIN_HOOD_TRACE_ENABLED
-#    include <iostream>
-#    define ROBIN_HOOD_TRACE(...) \
-        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
-#else
-#    define ROBIN_HOOD_TRACE(x)
-#endif
-
-// #define ROBIN_HOOD_COUNT_ENABLED
-#ifdef ROBIN_HOOD_COUNT_ENABLED
-#    include <iostream>
-#    define ROBIN_HOOD_COUNT(x) ++counts().x;
-namespace robin_hood {
-struct Counts {
-    uint64_t shiftUp{};
-    uint64_t shiftDown{};
-};
-inline std::ostream& operator<<(std::ostream& os, Counts const& c) {
-    return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl;
-}
-
-static Counts& counts() {
-    static Counts counts{};
-    return counts;
-}
-} // namespace robin_hood
-#else
-#    define ROBIN_HOOD_COUNT(x)
-#endif
-
-// all non-argument macros should use this facility. See
-// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/
-#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x()
-
-// mark unused members with this macro
-#define ROBIN_HOOD_UNUSED(identifier)
-
-// bitness
-#if SIZE_MAX == UINT32_MAX
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32
-#elif SIZE_MAX == UINT64_MAX
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64
-#else
-#    error Unsupported bitness
-#endif
-
-// endianess
-#ifdef _MSC_VER
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \
-        (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#endif
-
-// inline
-#ifdef _MSC_VER
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline)
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline))
-#endif
-
-// exceptions
-#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1
-#endif
-
-// count leading/trailing bits
-#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
-#    ifdef _MSC_VER
-#        if ROBIN_HOOD(BITNESS) == 32
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
-#        else
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
-#        endif
-#        include <intrin.h>
-#        pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
-#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x)                                       \
-            [](size_t mask) noexcept -> int {                                             \
-                unsigned long index;                                                      \
-                return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
-                                                                : ROBIN_HOOD(BITNESS);    \
-            }(x)
-#    else
-#        if ROBIN_HOOD(BITNESS) == 32
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
-#        else
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
-#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
-#        endif
-#        define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
-#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
-#    endif
-#endif
-
-// fallthrough
-#ifndef __has_cpp_attribute // For backwards compatibility
-#    define __has_cpp_attribute(x) 0
-#endif
-#if __has_cpp_attribute(clang::fallthrough)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]]
-#elif __has_cpp_attribute(gnu::fallthrough)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]]
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH()
-#endif
-
-// likely/unlikely
-#ifdef _MSC_VER
-#    define ROBIN_HOOD_LIKELY(condition) condition
-#    define ROBIN_HOOD_UNLIKELY(condition) condition
-#else
-#    define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1)
-#    define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0)
-#endif
-
-// detect if native wchar_t type is availiable in MSVC
-#ifdef _MSC_VER
-#    ifdef _NATIVE_WCHAR_T_DEFINED
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
-#    else
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0
-#    endif
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
-#endif
-
-// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr
-#ifdef _MSC_VER
-#    if _MSC_VER <= 1900
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1
-#    else
-#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
-#    endif
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
-#endif
-
-// workaround missing "is_trivially_copyable" in g++ < 5.0
-// See https://stackoverflow.com/a/31798726/48181
-#if defined(__GNUC__) && __GNUC__ < 5
-#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
-#else
-#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
-#endif
-
-// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L
-#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L
-
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]]
-#else
-#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD()
-#endif
-
-namespace robin_hood {
-
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
-#    define ROBIN_HOOD_STD std
-#else
-
-// c++11 compatibility layer
-namespace ROBIN_HOOD_STD {
-template <class T>
-struct alignment_of
-    : std::integral_constant<std::size_t, alignof(typename std::remove_all_extents<T>::type)> {};
-
-template <class T, T... Ints>
-class integer_sequence {
-public:
-    using value_type = T;
-    static_assert(std::is_integral<value_type>::value, "not integral type");
-    static constexpr std::size_t size() noexcept {
-        return sizeof...(Ints);
-    }
-};
-template <std::size_t... Inds>
-using index_sequence = integer_sequence<std::size_t, Inds...>;
-
-namespace detail_ {
-template <class T, T Begin, T End, bool>
-struct IntSeqImpl {
-    using TValue = T;
-    static_assert(std::is_integral<TValue>::value, "not integral type");
-    static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)");
-
-    template <class, class>
-    struct IntSeqCombiner;
-
-    template <TValue... Inds0, TValue... Inds1>
-    struct IntSeqCombiner<integer_sequence<TValue, Inds0...>, integer_sequence<TValue, Inds1...>> {
-        using TResult = integer_sequence<TValue, Inds0..., Inds1...>;
-    };
-
-    using TResult =
-        typename IntSeqCombiner<typename IntSeqImpl<TValue, Begin, Begin + (End - Begin) / 2,
-                                                    (End - Begin) / 2 == 1>::TResult,
-                                typename IntSeqImpl<TValue, Begin + (End - Begin) / 2, End,
-                                                    (End - Begin + 1) / 2 == 1>::TResult>::TResult;
-};
-
-template <class T, T Begin>
-struct IntSeqImpl<T, Begin, Begin, false> {
-    using TValue = T;
-    static_assert(std::is_integral<TValue>::value, "not integral type");
-    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
-    using TResult = integer_sequence<TValue>;
-};
-
-template <class T, T Begin, T End>
-struct IntSeqImpl<T, Begin, End, true> {
-    using TValue = T;
-    static_assert(std::is_integral<TValue>::value, "not integral type");
-    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
-    using TResult = integer_sequence<TValue, Begin>;
-};
-} // namespace detail_
-
-template <class T, T N>
-using make_integer_sequence = typename detail_::IntSeqImpl<T, 0, N, (N - 0) == 1>::TResult;
-
-template <std::size_t N>
-using make_index_sequence = make_integer_sequence<std::size_t, N>;
-
-template <class... T>
-using index_sequence_for = make_index_sequence<sizeof...(T)>;
-
-} // namespace ROBIN_HOOD_STD
-
-#endif
-
-namespace detail {
-
-// make sure we static_cast to the correct type for hash_int
-#if ROBIN_HOOD(BITNESS) == 64
-using SizeT = uint64_t;
-#else
-using SizeT = uint32_t;
-#endif
-
-template <typename T>
-T rotr(T x, unsigned k) {
-    return (x >> k) | (x << (8U * sizeof(T) - k));
-}
-
-// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to
-// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with
-// care!
-template <typename T>
-inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept {
-    return reinterpret_cast<T>(ptr);
-}
-
-template <typename T>
-inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
-    return reinterpret_cast<T>(ptr);
-}
-
-// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
-// inlinings more difficult. Throws are also generally the slow path.
-template <typename E, typename... Args>
-[[noreturn]] ROBIN_HOOD(NOINLINE)
-#if ROBIN_HOOD(HAS_EXCEPTIONS)
-    void doThrow(Args&&... args) {
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
-    throw E(std::forward<Args>(args)...);
-}
-#else
-    void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
-    abort();
-}
-#endif
-
-template <typename E, typename T, typename... Args>
-T* assertNotNull(T* t, Args&&... args) {
-    if (ROBIN_HOOD_UNLIKELY(nullptr == t)) {
-        doThrow<E>(std::forward<Args>(args)...);
-    }
-    return t;
-}
-
-template <typename T>
-inline T unaligned_load(void const* ptr) noexcept {
-    // using memcpy so we don't get into unaligned load problems.
-    // compiler should optimize this very well anyways.
-    T t;
-    std::memcpy(&t, ptr, sizeof(T));
-    return t;
-}
-
-// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor,
-// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a
-// pointer.
-template <typename T, size_t MinNumAllocs = 4, size_t MaxNumAllocs = 256>
-class BulkPoolAllocator {
-public:
-    BulkPoolAllocator() noexcept = default;
-
-    // does not copy anything, just creates a new allocator.
-    BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept
-        : mHead(nullptr)
-        , mListForFree(nullptr) {}
-
-    BulkPoolAllocator(BulkPoolAllocator&& o) noexcept
-        : mHead(o.mHead)
-        , mListForFree(o.mListForFree) {
-        o.mListForFree = nullptr;
-        o.mHead = nullptr;
-    }
-
-    BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept {
-        reset();
-        mHead = o.mHead;
-        mListForFree = o.mListForFree;
-        o.mListForFree = nullptr;
-        o.mHead = nullptr;
-        return *this;
-    }
-
-    BulkPoolAllocator&
-    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
-    operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept {
-        // does not do anything
-        return *this;
-    }
-
-    ~BulkPoolAllocator() noexcept {
-        reset();
-    }
-
-    // Deallocates all allocated memory.
-    void reset() noexcept {
-        while (mListForFree) {
-            T* tmp = *mListForFree;
-            ROBIN_HOOD_LOG("std::free")
-            std::free(mListForFree);
-            mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
-        }
-        mHead = nullptr;
-    }
-
-    // allocates, but does NOT initialize. Use in-place new constructor, e.g.
-    //   T* obj = pool.allocate();
-    //   ::new (static_cast<void*>(obj)) T();
-    T* allocate() {
-        T* tmp = mHead;
-        if (!tmp) {
-            tmp = performAllocation();
-        }
-
-        mHead = *reinterpret_cast_no_cast_align_warning<T**>(tmp);
-        return tmp;
-    }
-
-    // does not actually deallocate but puts it in store.
-    // make sure you have already called the destructor! e.g. with
-    //  obj->~T();
-    //  pool.deallocate(obj);
-    void deallocate(T* obj) noexcept {
-        *reinterpret_cast_no_cast_align_warning<T**>(obj) = mHead;
-        mHead = obj;
-    }
-
-    // Adds an already allocated block of memory to the allocator. This allocator is from now on
-    // responsible for freeing the data (with free()). If the provided data is not large enough to
-    // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor.
-    void addOrFree(void* ptr, const size_t numBytes) noexcept {
-        // calculate number of available elements in ptr
-        if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
-            // not enough data for at least one element. Free and return.
-            ROBIN_HOOD_LOG("std::free")
-            std::free(ptr);
-        } else {
-            ROBIN_HOOD_LOG("add to buffer")
-            add(ptr, numBytes);
-        }
-    }
-
-    void swap(BulkPoolAllocator<T, MinNumAllocs, MaxNumAllocs>& other) noexcept {
-        using std::swap;
-        swap(mHead, other.mHead);
-        swap(mListForFree, other.mListForFree);
-    }
-
-private:
-    // iterates the list of allocated memory to calculate how many to alloc next.
-    // Recalculating this each time saves us a size_t member.
-    // This ignores the fact that memory blocks might have been added manually with addOrFree. In
-    // practice, this should not matter much.
-    ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept {
-        auto tmp = mListForFree;
-        size_t numAllocs = MinNumAllocs;
-
-        while (numAllocs * 2 <= MaxNumAllocs && tmp) {
-            auto x = reinterpret_cast<T***>(tmp);
-            tmp = *x;
-            numAllocs *= 2;
-        }
-
-        return numAllocs;
-    }
-
-    // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree().
-    void add(void* ptr, const size_t numBytes) noexcept {
-        const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE;
-
-        auto data = reinterpret_cast<T**>(ptr);
-
-        // link free list
-        auto x = reinterpret_cast<T***>(data);
-        *x = mListForFree;
-        mListForFree = data;
-
-        // create linked list for newly allocated data
-        auto* const headT =
-            reinterpret_cast_no_cast_align_warning<T*>(reinterpret_cast<char*>(ptr) + ALIGNMENT);
-
-        auto* const head = reinterpret_cast<char*>(headT);
-
-        // Visual Studio compiler automatically unrolls this loop, which is pretty cool
-        for (size_t i = 0; i < numElements; ++i) {
-            *reinterpret_cast_no_cast_align_warning<char**>(head + i * ALIGNED_SIZE) =
-                head + (i + 1) * ALIGNED_SIZE;
-        }
-
-        // last one points to 0
-        *reinterpret_cast_no_cast_align_warning<T**>(head + (numElements - 1) * ALIGNED_SIZE) =
-            mHead;
-        mHead = headT;
-    }
-
-    // Called when no memory is available (mHead == 0).
-    // Don't inline this slow path.
-    ROBIN_HOOD(NOINLINE) T* performAllocation() {
-        size_t const numElementsToAlloc = calcNumElementsToAlloc();
-
-        // alloc new memory: [prev |T, T, ... T]
-        size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
-        ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
-                                      << " * " << numElementsToAlloc)
-        add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
-        return mHead;
-    }
-
-    // enforce byte alignment of the T's
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
-    static constexpr size_t ALIGNMENT =
-        (std::max)(std::alignment_of<T>::value, std::alignment_of<T*>::value);
-#else
-    static const size_t ALIGNMENT =
-        (ROBIN_HOOD_STD::alignment_of<T>::value > ROBIN_HOOD_STD::alignment_of<T*>::value)
-            ? ROBIN_HOOD_STD::alignment_of<T>::value
-            : +ROBIN_HOOD_STD::alignment_of<T*>::value; // the + is for walkarround
-#endif
-
-    static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT;
-
-    static_assert(MinNumAllocs >= 1, "MinNumAllocs");
-    static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs");
-    static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE");
-    static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod");
-    static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT");
-
-    T* mHead{nullptr};
-    T** mListForFree{nullptr};
-};
-
-template <typename T, size_t MinSize, size_t MaxSize, bool IsFlat>
-struct NodeAllocator;
-
-// dummy allocator that does nothing
-template <typename T, size_t MinSize, size_t MaxSize>
-struct NodeAllocator<T, MinSize, MaxSize, true> {
-
-    // we are not using the data, so just free it.
-    void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
-        ROBIN_HOOD_LOG("std::free")
-        std::free(ptr);
-    }
-};
-
-template <typename T, size_t MinSize, size_t MaxSize>
-struct NodeAllocator<T, MinSize, MaxSize, false> : public BulkPoolAllocator<T, MinSize, MaxSize> {};
-
-// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making
-// my own here.
-namespace swappable {
-#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17)
-using std::swap;
-template <typename T>
-struct nothrow {
-    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
-};
-#else
-template <typename T>
-struct nothrow {
-    static const bool value = std::is_nothrow_swappable<T>::value;
-};
-#endif
-} // namespace swappable
-
-} // namespace detail
-
-struct is_transparent_tag {};
-
-// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable,
-// which means it would  not be allowed to be used in std::memcpy. This struct is copyable, which is
-// also tested.
-template <typename T1, typename T2>
-struct pair {
-    using first_type = T1;
-    using second_type = T2;
-
-    template <typename U1 = T1, typename U2 = T2,
-              typename = typename std::enable_if<std::is_default_constructible<U1>::value &&
-                                                 std::is_default_constructible<U2>::value>::type>
-    constexpr pair() noexcept(noexcept(U1()) && noexcept(U2()))
-        : first()
-        , second() {}
-
-    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
-    explicit constexpr pair(std::pair<T1, T2> const& o) noexcept(
-        noexcept(T1(std::declval<T1 const&>())) && noexcept(T2(std::declval<T2 const&>())))
-        : first(o.first)
-        , second(o.second) {}
-
-    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
-    explicit constexpr pair(std::pair<T1, T2>&& o) noexcept(noexcept(
-        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
-        : first(std::move(o.first))
-        , second(std::move(o.second)) {}
-
-    constexpr pair(T1&& a, T2&& b) noexcept(noexcept(
-        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
-        : first(std::move(a))
-        , second(std::move(b)) {}
-
-    template <typename U1, typename U2>
-    constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward<U1>(
-        std::declval<U1&&>()))) && noexcept(T2(std::forward<U2>(std::declval<U2&&>()))))
-        : first(std::forward<U1>(a))
-        , second(std::forward<U2>(b)) {}
-
-    template <typename... U1, typename... U2>
-    // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members"
-    // if this constructor is constexpr
-#if !ROBIN_HOOD(BROKEN_CONSTEXPR)
-    constexpr
-#endif
-        pair(std::piecewise_construct_t /*unused*/, std::tuple<U1...> a,
-             std::tuple<U2...>
-                 b) noexcept(noexcept(pair(std::declval<std::tuple<U1...>&>(),
-                                           std::declval<std::tuple<U2...>&>(),
-                                           ROBIN_HOOD_STD::index_sequence_for<U1...>(),
-                                           ROBIN_HOOD_STD::index_sequence_for<U2...>())))
-        : pair(a, b, ROBIN_HOOD_STD::index_sequence_for<U1...>(),
-               ROBIN_HOOD_STD::index_sequence_for<U2...>()) {
-    }
-
-    // constructor called from the std::piecewise_construct_t ctor
-    template <typename... U1, size_t... I1, typename... U2, size_t... I2>
-    pair(std::tuple<U1...>& a, std::tuple<U2...>& b, ROBIN_HOOD_STD::index_sequence<I1...> /*unused*/, ROBIN_HOOD_STD::index_sequence<I2...> /*unused*/) noexcept(
-        noexcept(T1(std::forward<U1>(std::get<I1>(
-            std::declval<std::tuple<
-                U1...>&>()))...)) && noexcept(T2(std::
-                                                     forward<U2>(std::get<I2>(
-                                                         std::declval<std::tuple<U2...>&>()))...)))
-        : first(std::forward<U1>(std::get<I1>(a))...)
-        , second(std::forward<U2>(std::get<I2>(b))...) {
-        // make visual studio compiler happy about warning about unused a & b.
-        // Visual studio's pair implementation disables warning 4100.
-        (void)a;
-        (void)b;
-    }
-
-    void swap(pair<T1, T2>& o) noexcept((detail::swappable::nothrow<T1>::value) &&
-                                        (detail::swappable::nothrow<T2>::value)) {
-        using std::swap;
-        swap(first, o.first);
-        swap(second, o.second);
-    }
-
-    T1 first;  // NOLINT(misc-non-private-member-variables-in-classes)
-    T2 second; // NOLINT(misc-non-private-member-variables-in-classes)
-};
-
-template <typename A, typename B>
-inline void swap(pair<A, B>& a, pair<A, B>& b) noexcept(
-    noexcept(std::declval<pair<A, B>&>().swap(std::declval<pair<A, B>&>()))) {
-    a.swap(b);
-}
-
-template <typename A, typename B>
-inline constexpr bool operator==(pair<A, B> const& x, pair<A, B> const& y) {
-    return (x.first == y.first) && (x.second == y.second);
-}
-template <typename A, typename B>
-inline constexpr bool operator!=(pair<A, B> const& x, pair<A, B> const& y) {
-    return !(x == y);
-}
-template <typename A, typename B>
-inline constexpr bool operator<(pair<A, B> const& x, pair<A, B> const& y) noexcept(noexcept(
-    std::declval<A const&>() < std::declval<A const&>()) && noexcept(std::declval<B const&>() <
-                                                                     std::declval<B const&>())) {
-    return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
-}
-template <typename A, typename B>
-inline constexpr bool operator>(pair<A, B> const& x, pair<A, B> const& y) {
-    return y < x;
-}
-template <typename A, typename B>
-inline constexpr bool operator<=(pair<A, B> const& x, pair<A, B> const& y) {
-    return !(x > y);
-}
-template <typename A, typename B>
-inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
-    return !(x < y);
-}
-
-inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
-    static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
-    static constexpr uint64_t seed = UINT64_C(0xe17a1465);
-    static constexpr unsigned int r = 47;
-
-    auto const* const data64 = static_cast<uint64_t const*>(ptr);
-    uint64_t h = seed ^ (len * m);
-
-    size_t const n_blocks = len / 8;
-    for (size_t i = 0; i < n_blocks; ++i) {
-        auto k = detail::unaligned_load<uint64_t>(data64 + i);
-
-        k *= m;
-        k ^= k >> r;
-        k *= m;
-
-        h ^= k;
-        h *= m;
-    }
-
-    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
-    switch (len & 7U) {
-    case 7:
-        h ^= static_cast<uint64_t>(data8[6]) << 48U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 6:
-        h ^= static_cast<uint64_t>(data8[5]) << 40U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 5:
-        h ^= static_cast<uint64_t>(data8[4]) << 32U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 4:
-        h ^= static_cast<uint64_t>(data8[3]) << 24U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 3:
-        h ^= static_cast<uint64_t>(data8[2]) << 16U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 2:
-        h ^= static_cast<uint64_t>(data8[1]) << 8U;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    case 1:
-        h ^= static_cast<uint64_t>(data8[0]);
-        h *= m;
-        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
-    default:
-        break;
-    }
-
-    h ^= h >> r;
-
-    // not doing the final step here, because this will be done by keyToIdx anyways
-    // h *= m;
-    // h ^= h >> r;
-    return static_cast<size_t>(h);
-}
-
-inline size_t hash_int(uint64_t x) noexcept {
-    // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested,
-    // and doesn't need any special 128bit operations.
-    x ^= x >> 33U;
-    x *= UINT64_C(0xff51afd7ed558ccd);
-    x ^= x >> 33U;
-
-    // not doing the final step here, because this will be done by keyToIdx anyways
-    // x *= UINT64_C(0xc4ceb9fe1a85ec53);
-    // x ^= x >> 33U;
-    return static_cast<size_t>(x);
-}
-
-// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
-template <typename T, typename Enable = void>
-struct hash : public std::hash<T> {
-    size_t operator()(T const& obj) const
-        noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
-        // call base hash
-        auto result = std::hash<T>::operator()(obj);
-        // return mixed of that, to be save against identity has
-        return hash_int(static_cast<detail::SizeT>(result));
-    }
-};
-
-template <typename CharT>
-struct hash<std::basic_string<CharT>> {
-    size_t operator()(std::basic_string<CharT> const& str) const noexcept {
-        return hash_bytes(str.data(), sizeof(CharT) * str.size());
-    }
-};
-
-#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
-template <typename CharT>
-struct hash<std::basic_string_view<CharT>> {
-    size_t operator()(std::basic_string_view<CharT> const& sv) const noexcept {
-        return hash_bytes(sv.data(), sizeof(CharT) * sv.size());
-    }
-};
-#endif
-
-template <class T>
-struct hash<T*> {
-    size_t operator()(T* ptr) const noexcept {
-        return hash_int(reinterpret_cast<detail::SizeT>(ptr));
-    }
-};
-
-template <class T>
-struct hash<std::unique_ptr<T>> {
-    size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
-        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
-    }
-};
-
-template <class T>
-struct hash<std::shared_ptr<T>> {
-    size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
-        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
-    }
-};
-
-template <typename Enum>
-struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
-    size_t operator()(Enum e) const noexcept {
-        using Underlying = typename std::underlying_type<Enum>::type;
-        return hash<Underlying>{}(static_cast<Underlying>(e));
-    }
-};
-
-#define ROBIN_HOOD_HASH_INT(T)                           \
-    template <>                                          \
-    struct hash<T> {                                     \
-        size_t operator()(T const& obj) const noexcept { \
-            return hash_int(static_cast<uint64_t>(obj)); \
-        }                                                \
-    }
-
-#if defined(__GNUC__) && !defined(__clang__)
-#    pragma GCC diagnostic push
-#    pragma GCC diagnostic ignored "-Wuseless-cast"
-#endif
-// see https://en.cppreference.com/w/cpp/utility/hash
-ROBIN_HOOD_HASH_INT(bool);
-ROBIN_HOOD_HASH_INT(char);
-ROBIN_HOOD_HASH_INT(signed char);
-ROBIN_HOOD_HASH_INT(unsigned char);
-ROBIN_HOOD_HASH_INT(char16_t);
-ROBIN_HOOD_HASH_INT(char32_t);
-#if ROBIN_HOOD(HAS_NATIVE_WCHART)
-ROBIN_HOOD_HASH_INT(wchar_t);
-#endif
-ROBIN_HOOD_HASH_INT(short);
-ROBIN_HOOD_HASH_INT(unsigned short);
-ROBIN_HOOD_HASH_INT(int);
-ROBIN_HOOD_HASH_INT(unsigned int);
-ROBIN_HOOD_HASH_INT(long);
-ROBIN_HOOD_HASH_INT(long long);
-ROBIN_HOOD_HASH_INT(unsigned long);
-ROBIN_HOOD_HASH_INT(unsigned long long);
-#if defined(__GNUC__) && !defined(__clang__)
-#    pragma GCC diagnostic pop
-#endif
-namespace detail {
-
-template <typename T>
-struct void_type {
-    using type = void;
-};
-
-template <typename T, typename = void>
-struct has_is_transparent : public std::false_type {};
-
-template <typename T>
-struct has_is_transparent<T, typename void_type<typename T::is_transparent>::type>
-    : public std::true_type {};
-
-// using wrapper classes for hash and key_equal prevents the diamond problem when the same type
-// is used. see https://stackoverflow.com/a/28771920/48181
-template <typename T>
-struct WrapHash : public T {
-    WrapHash() = default;
-    explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
-        : T(o) {}
-};
-
-template <typename T>
-struct WrapKeyEqual : public T {
-    WrapKeyEqual() = default;
-    explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
-        : T(o) {}
-};
-
-// A highly optimized hashmap implementation, using the Robin Hood algorithm.
-//
-// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but
-// be about 2x faster in most cases and require much less allocations.
-//
-// This implementation uses the following memory layout:
-//
-// [Node, Node, ... Node | info, info, ... infoSentinel ]
-//
-// * Node: either a DataNode that directly has the std::pair<key, val> as member,
-//   or a DataNode with a pointer to std::pair<key,val>. Which DataNode representation to use
-//   depends on how fast the swap() operation is. Heuristically, this is automatically choosen
-//   based on sizeof(). there are always 2^n Nodes.
-//
-// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes.
-//   Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the
-//   corresponding node contains data. Set to 2 means the corresponding Node is filled, but it
-//   actually belongs to the previous position and was pushed out because that place is already
-//   taken.
-//
-// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the
-//   need for a idx variable.
-//
-// According to STL, order of templates has effect on throughput. That's why I've moved the
-// boolean to the front.
-// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/
-template <bool IsFlat, size_t MaxLoadFactor100, typename Key, typename T, typename Hash,
-          typename KeyEqual>
-class Table
-    : public WrapHash<Hash>,
-      public WrapKeyEqual<KeyEqual>,
-      detail::NodeAllocator<
-          typename std::conditional<
-              std::is_void<T>::value, Key,
-              robin_hood::pair<typename std::conditional<IsFlat, Key, Key const>::type, T>>::type,
-          4, 16384, IsFlat> {
-public:
-    static constexpr bool is_flat = IsFlat;
-    static constexpr bool is_map = !std::is_void<T>::value;
-    static constexpr bool is_set = !is_map;
-    static constexpr bool is_transparent =
-        has_is_transparent<Hash>::value && has_is_transparent<KeyEqual>::value;
-
-    using key_type = Key;
-    using mapped_type = T;
-    using value_type = typename std::conditional<
-        is_set, Key,
-        robin_hood::pair<typename std::conditional<is_flat, Key, Key const>::type, T>>::type;
-    using size_type = size_t;
-    using hasher = Hash;
-    using key_equal = KeyEqual;
-    using Self = Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
-
-private:
-    static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100,
-                  "MaxLoadFactor100 needs to be >10 && < 100");
-
-    using WHash = WrapHash<Hash>;
-    using WKeyEqual = WrapKeyEqual<KeyEqual>;
-
-    // configuration defaults
-
-    // make sure we have 8 elements, needed to quickly rehash mInfo
-    static constexpr size_t InitialNumElements = sizeof(uint64_t);
-    static constexpr uint32_t InitialInfoNumBits = 5;
-    static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
-    static constexpr size_t InfoMask = InitialInfoInc - 1U;
-    static constexpr uint8_t InitialInfoHashShift = 0;
-    using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
-
-    // type needs to be wider than uint8_t.
-    using InfoType = uint32_t;
-
-    // DataNode ////////////////////////////////////////////////////////
-
-    // Primary template for the data node. We have special implementations for small and big
-    // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these
-    // on the heap so swap merely swaps a pointer.
-    template <typename M, bool>
-    class DataNode {};
-
-    // Small: just allocate on the stack.
-    template <typename M>
-    class DataNode<M, true> final {
-    public:
-        template <typename... Args>
-        explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept(
-            noexcept(value_type(std::forward<Args>(args)...)))
-            : mData(std::forward<Args>(args)...) {}
-
-        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, true>&& n) noexcept(
-            std::is_nothrow_move_constructible<value_type>::value)
-            : mData(std::move(n.mData)) {}
-
-        // doesn't do anything
-        void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {}
-        void destroyDoNotDeallocate() noexcept {}
-
-        value_type const* operator->() const noexcept {
-            return &mData;
-        }
-        value_type* operator->() noexcept {
-            return &mData;
-        }
-
-        const value_type& operator*() const noexcept {
-            return mData;
-        }
-
-        value_type& operator*() noexcept {
-            return mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
-            return mData.first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
-            return mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type const&>::type
-            getFirst() const noexcept {
-            return mData.first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
-            return mData;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
-            return mData.second;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, MT const&>::type getSecond() const noexcept {
-            return mData.second;
-        }
-
-        void swap(DataNode<M, true>& o) noexcept(
-            noexcept(std::declval<value_type>().swap(std::declval<value_type>()))) {
-            mData.swap(o.mData);
-        }
-
-    private:
-        value_type mData;
-    };
-
-    // big object: allocate on heap.
-    template <typename M>
-    class DataNode<M, false> {
-    public:
-        template <typename... Args>
-        explicit DataNode(M& map, Args&&... args)
-            : mData(map.allocate()) {
-            ::new (static_cast<void*>(mData)) value_type(std::forward<Args>(args)...);
-        }
-
-        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, false>&& n) noexcept
-            : mData(std::move(n.mData)) {}
-
-        void destroy(M& map) noexcept {
-            // don't deallocate, just put it into list of datapool.
-            mData->~value_type();
-            map.deallocate(mData);
-        }
-
-        void destroyDoNotDeallocate() noexcept {
-            mData->~value_type();
-        }
-
-        value_type const* operator->() const noexcept {
-            return mData;
-        }
-
-        value_type* operator->() noexcept {
-            return mData;
-        }
-
-        const value_type& operator*() const {
-            return *mData;
-        }
-
-        value_type& operator*() {
-            return *mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
-            return mData->first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
-            return *mData;
-        }
-
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, typename VT::first_type const&>::type
-            getFirst() const noexcept {
-            return mData->first;
-        }
-        template <typename VT = value_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
-            return *mData;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
-            return mData->second;
-        }
-
-        template <typename MT = mapped_type>
-        ROBIN_HOOD(NODISCARD)
-        typename std::enable_if<is_map, MT const&>::type getSecond() const noexcept {
-            return mData->second;
-        }
-
-        void swap(DataNode<M, false>& o) noexcept {
-            using std::swap;
-            swap(mData, o.mData);
-        }
-
-    private:
-        value_type* mData;
-    };
-
-    using Node = DataNode<Self, IsFlat>;
-
-    // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required)
-    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept {
-        return n.getFirst();
-    }
-
-    // in case we have void mapped_type, we are not using a pair, thus we just route k through.
-    // No need to disable this because it's just not used if not applicable.
-    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept {
-        return k;
-    }
-
-    // in case we have non-void mapped_type, we have a standard robin_hood::pair
-    template <typename Q = mapped_type>
-    ROBIN_HOOD(NODISCARD)
-    typename std::enable_if<!std::is_void<Q>::value, key_type const&>::type
-        getFirstConst(value_type const& vt) const noexcept {
-        return vt.first;
-    }
-
-    // Cloner //////////////////////////////////////////////////////////
-
-    template <typename M, bool UseMemcpy>
-    struct Cloner;
-
-    // fast path: Just copy data, without allocating anything.
-    template <typename M>
-    struct Cloner<M, true> {
-        void operator()(M const& source, M& target) const {
-            auto const* const src = reinterpret_cast<char const*>(source.mKeyVals);
-            auto* tgt = reinterpret_cast<char*>(target.mKeyVals);
-            auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1);
-            std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt);
-        }
-    };
-
-    template <typename M>
-    struct Cloner<M, false> {
-        void operator()(M const& s, M& t) const {
-            auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1);
-            std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo);
-
-            for (size_t i = 0; i < numElementsWithBuffer; ++i) {
-                if (t.mInfo[i]) {
-                    ::new (static_cast<void*>(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]);
-                }
-            }
-        }
-    };
-
-    // Destroyer ///////////////////////////////////////////////////////
-
-    template <typename M, bool IsFlatAndTrivial>
-    struct Destroyer {};
-
-    template <typename M>
-    struct Destroyer<M, true> {
-        void nodes(M& m) const noexcept {
-            m.mNumElements = 0;
-        }
-
-        void nodesDoNotDeallocate(M& m) const noexcept {
-            m.mNumElements = 0;
-        }
-    };
-
-    template <typename M>
-    struct Destroyer<M, false> {
-        void nodes(M& m) const noexcept {
-            m.mNumElements = 0;
-            // clear also resets mInfo to 0, that's sometimes not necessary.
-            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
-
-            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
-                if (0 != m.mInfo[idx]) {
-                    Node& n = m.mKeyVals[idx];
-                    n.destroy(m);
-                    n.~Node();
-                }
-            }
-        }
-
-        void nodesDoNotDeallocate(M& m) const noexcept {
-            m.mNumElements = 0;
-            // clear also resets mInfo to 0, that's sometimes not necessary.
-            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
-            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
-                if (0 != m.mInfo[idx]) {
-                    Node& n = m.mKeyVals[idx];
-                    n.destroyDoNotDeallocate();
-                    n.~Node();
-                }
-            }
-        }
-    };
-
-    // Iter ////////////////////////////////////////////////////////////
-
-    struct fast_forward_tag {};
-
-    // generic iterator for both const_iterator and iterator.
-    template <bool IsConst>
-    // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions)
-    class Iter {
-    private:
-        using NodePtr = typename std::conditional<IsConst, Node const*, Node*>::type;
-
-    public:
-        using difference_type = std::ptrdiff_t;
-        using value_type = typename Self::value_type;
-        using reference = typename std::conditional<IsConst, value_type const&, value_type&>::type;
-        using pointer = typename std::conditional<IsConst, value_type const*, value_type*>::type;
-        using iterator_category = std::forward_iterator_tag;
-
-        // default constructed iterator can be compared to itself, but WON'T return true when
-        // compared to end().
-        Iter() = default;
-
-        // Rule of zero: nothing specified. The conversion constructor is only enabled for
-        // iterator to const_iterator, so it doesn't accidentally work as a copy ctor.
-
-        // Conversion constructor from iterator to const_iterator.
-        template <bool OtherIsConst,
-                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
-        // NOLINTNEXTLINE(hicpp-explicit-conversions)
-        Iter(Iter<OtherIsConst> const& other) noexcept
-            : mKeyVals(other.mKeyVals)
-            , mInfo(other.mInfo) {}
-
-        Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept
-            : mKeyVals(valPtr)
-            , mInfo(infoPtr) {}
-
-        Iter(NodePtr valPtr, uint8_t const* infoPtr,
-             fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept
-            : mKeyVals(valPtr)
-            , mInfo(infoPtr) {
-            fastForward();
-        }
-
-        template <bool OtherIsConst,
-                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
-        Iter& operator=(Iter<OtherIsConst> const& other) noexcept {
-            mKeyVals = other.mKeyVals;
-            mInfo = other.mInfo;
-            return *this;
-        }
-
-        // prefix increment. Undefined behavior if we are at end()!
-        Iter& operator++() noexcept {
-            mInfo++;
-            mKeyVals++;
-            fastForward();
-            return *this;
-        }
-
-        Iter operator++(int) noexcept {
-            Iter tmp = *this;
-            ++(*this);
-            return tmp;
-        }
-
-        reference operator*() const {
-            return **mKeyVals;
-        }
-
-        pointer operator->() const {
-            return &**mKeyVals;
-        }
-
-        template <bool O>
-        bool operator==(Iter<O> const& o) const noexcept {
-            return mKeyVals == o.mKeyVals;
-        }
-
-        template <bool O>
-        bool operator!=(Iter<O> const& o) const noexcept {
-            return mKeyVals != o.mKeyVals;
-        }
-
-    private:
-        // fast forward to the next non-free info byte
-        // I've tried a few variants that don't depend on intrinsics, but unfortunately they are
-        // quite a bit slower than this one. So I've reverted that change again. See map_benchmark.
-        void fastForward() noexcept {
-            size_t n = 0;
-            while (0U == (n = detail::unaligned_load<size_t>(mInfo))) {
-                mInfo += sizeof(size_t);
-                mKeyVals += sizeof(size_t);
-            }
-#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
-            // we know for certain that within the next 8 bytes we'll find a non-zero one.
-            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
-                mInfo += 4;
-                mKeyVals += 4;
-            }
-            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
-                mInfo += 2;
-                mKeyVals += 2;
-            }
-            if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
-                mInfo += 1;
-                mKeyVals += 1;
-            }
-#else
-#    if ROBIN_HOOD(LITTLE_ENDIAN)
-            auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
-#    else
-            auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
-#    endif
-            mInfo += inc;
-            mKeyVals += inc;
-#endif
-        }
-
-        friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
-        NodePtr mKeyVals{nullptr};
-        uint8_t const* mInfo{nullptr};
-    };
-
-    ////////////////////////////////////////////////////////////////////
-
-    // highly performance relevant code.
-    // Lower bits are used for indexing into the array (2^n size)
-    // The upper 1-5 bits need to be a reasonable good hash, to save comparisons.
-    template <typename HashKey>
-    void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const {
-        // In addition to whatever hash is used, add another mul & shift so we get better hashing.
-        // This serves as a bad hash prevention, if the given data is
-        // badly mixed.
-        auto h = static_cast<uint64_t>(WHash::operator()(key));
-
-        h *= mHashMultiplier;
-        h ^= h >> 33U;
-
-        // the lower InitialInfoNumBits are reserved for info.
-        *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
-        *idx = (static_cast<size_t>(h) >> InitialInfoNumBits) & mMask;
-    }
-
-    // forwards the index by one, wrapping around at the end
-    void next(InfoType* info, size_t* idx) const noexcept {
-        *idx = *idx + 1;
-        *info += mInfoInc;
-    }
-
-    void nextWhileLess(InfoType* info, size_t* idx) const noexcept {
-        // unrolling this by hand did not bring any speedups.
-        while (*info < mInfo[*idx]) {
-            next(info, idx);
-        }
-    }
-
-    // Shift everything up by one element. Tries to move stuff around.
-    void
-    shiftUp(size_t startIdx,
-            size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
-        auto idx = startIdx;
-        ::new (static_cast<void*>(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1]));
-        while (--idx != insertion_idx) {
-            mKeyVals[idx] = std::move(mKeyVals[idx - 1]);
-        }
-
-        idx = startIdx;
-        while (idx != insertion_idx) {
-            ROBIN_HOOD_COUNT(shiftUp)
-            mInfo[idx] = static_cast<uint8_t>(mInfo[idx - 1] + mInfoInc);
-            if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) {
-                mMaxNumElementsAllowed = 0;
-            }
-            --idx;
-        }
-    }
-
-    void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
-        // until we find one that is either empty or has zero offset.
-        // TODO(martinus) we don't need to move everything, just the last one for the same
-        // bucket.
-        mKeyVals[idx].destroy(*this);
-
-        // until we find one that is either empty or has zero offset.
-        while (mInfo[idx + 1] >= 2 * mInfoInc) {
-            ROBIN_HOOD_COUNT(shiftDown)
-            mInfo[idx] = static_cast<uint8_t>(mInfo[idx + 1] - mInfoInc);
-            mKeyVals[idx] = std::move(mKeyVals[idx + 1]);
-            ++idx;
-        }
-
-        mInfo[idx] = 0;
-        // don't destroy, we've moved it
-        // mKeyVals[idx].destroy(*this);
-        mKeyVals[idx].~Node();
-    }
-
-    // copy of find(), except that it returns iterator instead of const_iterator.
-    template <typename Other>
-    ROBIN_HOOD(NODISCARD)
-    size_t findIdx(Other const& key) const {
-        size_t idx{};
-        InfoType info{};
-        keyToIdx(key, &idx, &info);
-
-        do {
-            // unrolling this twice gives a bit of a speedup. More unrolling did not help.
-            if (info == mInfo[idx] &&
-                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
-                return idx;
-            }
-            next(&info, &idx);
-            if (info == mInfo[idx] &&
-                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
-                return idx;
-            }
-            next(&info, &idx);
-        } while (info <= mInfo[idx]);
-
-        // nothing found!
-        return mMask == 0 ? 0
-                          : static_cast<size_t>(std::distance(
-                                mKeyVals, reinterpret_cast_no_cast_align_warning<Node*>(mInfo)));
-    }
-
-    void cloneData(const Table& o) {
-        Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
-    }
-
-    // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
-    // @return True on success, false if something went wrong
-    void insert_move(Node&& keyval) {
-        // we don't retry, fail if overflowing
-        // don't need to check max num elements
-        if (0 == mMaxNumElementsAllowed && !try_increase_info()) {
-            throwOverflowError();
-        }
-
-        size_t idx{};
-        InfoType info{};
-        keyToIdx(keyval.getFirst(), &idx, &info);
-
-        // skip forward. Use <= because we are certain that the element is not there.
-        while (info <= mInfo[idx]) {
-            idx = idx + 1;
-            info += mInfoInc;
-        }
-
-        // key not found, so we are now exactly where we want to insert it.
-        auto const insertion_idx = idx;
-        auto const insertion_info = static_cast<uint8_t>(info);
-        if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
-            mMaxNumElementsAllowed = 0;
-        }
-
-        // find an empty spot
-        while (0 != mInfo[idx]) {
-            next(&info, &idx);
-        }
-
-        auto& l = mKeyVals[insertion_idx];
-        if (idx == insertion_idx) {
-            ::new (static_cast<void*>(&l)) Node(std::move(keyval));
-        } else {
-            shiftUp(idx, insertion_idx);
-            l = std::move(keyval);
-        }
-
-        // put at empty spot
-        mInfo[insertion_idx] = insertion_info;
-
-        ++mNumElements;
-    }
-
-public:
-    using iterator = Iter<false>;
-    using const_iterator = Iter<true>;
-
-    Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
-        : WHash()
-        , WKeyEqual() {
-        ROBIN_HOOD_TRACE(this)
-    }
-
-    // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
-    // This tremendously speeds up ctor & dtor of a map that never receives an element. The
-    // penalty is payed at the first insert, and not before. Lookup of this empty map works
-    // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
-    // standard, but we can ignore it.
-    explicit Table(
-        size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
-        const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
-        : WHash(h)
-        , WKeyEqual(equal) {
-        ROBIN_HOOD_TRACE(this)
-    }
-
-    template <typename Iter>
-    Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0,
-          const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{})
-        : WHash(h)
-        , WKeyEqual(equal) {
-        ROBIN_HOOD_TRACE(this)
-        insert(first, last);
-    }
-
-    Table(std::initializer_list<value_type> initlist,
-          size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
-          const KeyEqual& equal = KeyEqual{})
-        : WHash(h)
-        , WKeyEqual(equal) {
-        ROBIN_HOOD_TRACE(this)
-        insert(initlist.begin(), initlist.end());
-    }
-
-    Table(Table&& o) noexcept
-        : WHash(std::move(static_cast<WHash&>(o)))
-        , WKeyEqual(std::move(static_cast<WKeyEqual&>(o)))
-        , DataPool(std::move(static_cast<DataPool&>(o))) {
-        ROBIN_HOOD_TRACE(this)
-        if (o.mMask) {
-            mHashMultiplier = std::move(o.mHashMultiplier);
-            mKeyVals = std::move(o.mKeyVals);
-            mInfo = std::move(o.mInfo);
-            mNumElements = std::move(o.mNumElements);
-            mMask = std::move(o.mMask);
-            mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
-            mInfoInc = std::move(o.mInfoInc);
-            mInfoHashShift = std::move(o.mInfoHashShift);
-            // set other's mask to 0 so its destructor won't do anything
-            o.init();
-        }
-    }
-
-    Table& operator=(Table&& o) noexcept {
-        ROBIN_HOOD_TRACE(this)
-        if (&o != this) {
-            if (o.mMask) {
-                // only move stuff if the other map actually has some data
-                destroy();
-                mHashMultiplier = std::move(o.mHashMultiplier);
-                mKeyVals = std::move(o.mKeyVals);
-                mInfo = std::move(o.mInfo);
-                mNumElements = std::move(o.mNumElements);
-                mMask = std::move(o.mMask);
-                mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
-                mInfoInc = std::move(o.mInfoInc);
-                mInfoHashShift = std::move(o.mInfoHashShift);
-                WHash::operator=(std::move(static_cast<WHash&>(o)));
-                WKeyEqual::operator=(std::move(static_cast<WKeyEqual&>(o)));
-                DataPool::operator=(std::move(static_cast<DataPool&>(o)));
-
-                o.init();
-
-            } else {
-                // nothing in the other map => just clear us.
-                clear();
-            }
-        }
-        return *this;
-    }
-
-    Table(const Table& o)
-        : WHash(static_cast<const WHash&>(o))
-        , WKeyEqual(static_cast<const WKeyEqual&>(o))
-        , DataPool(static_cast<const DataPool&>(o)) {
-        ROBIN_HOOD_TRACE(this)
-        if (!o.empty()) {
-            // not empty: create an exact copy. it is also possible to just iterate through all
-            // elements and insert them, but copying is probably faster.
-
-            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
-            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
-
-            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
-                                          << numElementsWithBuffer << ")")
-            mHashMultiplier = o.mHashMultiplier;
-            mKeyVals = static_cast<Node*>(
-                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
-            // no need for calloc because clonData does memcpy
-            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
-            mNumElements = o.mNumElements;
-            mMask = o.mMask;
-            mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
-            mInfoInc = o.mInfoInc;
-            mInfoHashShift = o.mInfoHashShift;
-            cloneData(o);
-        }
-    }
-
-    // Creates a copy of the given map. Copy constructor of each entry is used.
-    // Not sure why clang-tidy thinks this doesn't handle self assignment, it does
-    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
-    Table& operator=(Table const& o) {
-        ROBIN_HOOD_TRACE(this)
-        if (&o == this) {
-            // prevent assigning of itself
-            return *this;
-        }
-
-        // we keep using the old allocator and not assign the new one, because we want to keep
-        // the memory available. when it is the same size.
-        if (o.empty()) {
-            if (0 == mMask) {
-                // nothing to do, we are empty too
-                return *this;
-            }
-
-            // not empty: destroy what we have there
-            // clear also resets mInfo to 0, that's sometimes not necessary.
-            destroy();
-            init();
-            WHash::operator=(static_cast<const WHash&>(o));
-            WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
-            DataPool::operator=(static_cast<DataPool const&>(o));
-
-            return *this;
-        }
-
-        // clean up old stuff
-        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
-
-        if (mMask != o.mMask) {
-            // no luck: we don't have the same array size allocated, so we need to realloc.
-            if (0 != mMask) {
-                // only deallocate if we actually have data!
-                ROBIN_HOOD_LOG("std::free")
-                std::free(mKeyVals);
-            }
-
-            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
-            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
-            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
-                                          << numElementsWithBuffer << ")")
-            mKeyVals = static_cast<Node*>(
-                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
-
-            // no need for calloc here because cloneData performs a memcpy.
-            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
-            // sentinel is set in cloneData
-        }
-        WHash::operator=(static_cast<const WHash&>(o));
-        WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
-        DataPool::operator=(static_cast<DataPool const&>(o));
-        mHashMultiplier = o.mHashMultiplier;
-        mNumElements = o.mNumElements;
-        mMask = o.mMask;
-        mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
-        mInfoInc = o.mInfoInc;
-        mInfoHashShift = o.mInfoHashShift;
-        cloneData(o);
-
-        return *this;
-    }
-
-    // Swaps everything between the two maps.
-    void swap(Table& o) {
-        ROBIN_HOOD_TRACE(this)
-        using std::swap;
-        swap(o, *this);
-    }
-
-    // Clears all data, without resizing.
-    void clear() {
-        ROBIN_HOOD_TRACE(this)
-        if (empty()) {
-            // don't do anything! also important because we don't want to write to
-            // DummyInfoByte::b, even though we would just write 0 to it.
-            return;
-        }
-
-        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
-
-        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
-        // clear everything, then set the sentinel again
-        uint8_t const z = 0;
-        std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z);
-        mInfo[numElementsWithBuffer] = 1;
-
-        mInfoInc = InitialInfoInc;
-        mInfoHashShift = InitialInfoHashShift;
-    }
-
-    // Destroys the map and all it's contents.
-    ~Table() {
-        ROBIN_HOOD_TRACE(this)
-        destroy();
-    }
-
-    // Checks if both tables contain the same entries. Order is irrelevant.
-    bool operator==(const Table& other) const {
-        ROBIN_HOOD_TRACE(this)
-        if (other.size() != size()) {
-            return false;
-        }
-        for (auto const& otherEntry : other) {
-            if (!has(otherEntry)) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    bool operator!=(const Table& other) const {
-        ROBIN_HOOD_TRACE(this)
-        return !operator==(other);
-    }
-
-    template <typename Q = mapped_type>
-    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](const key_type& key) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
-                Node(*this, std::piecewise_construct, std::forward_as_tuple(key),
-                     std::forward_as_tuple());
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
-                                               std::forward_as_tuple(key), std::forward_as_tuple());
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-        }
-
-        return mKeyVals[idxAndState.first].getSecond();
-    }
-
-    template <typename Q = mapped_type>
-    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](key_type&& key) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
-                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
-                     std::forward_as_tuple());
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] =
-                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
-                     std::forward_as_tuple());
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-        }
-
-        return mKeyVals[idxAndState.first].getSecond();
-    }
-
-    template <typename Iter>
-    void insert(Iter first, Iter last) {
-        for (; first != last; ++first) {
-            // value_type ctor needed because this might be called with std::pair's
-            insert(value_type(*first));
-        }
-    }
-
-    void insert(std::initializer_list<value_type> ilist) {
-        for (auto&& vt : ilist) {
-            insert(std::move(vt));
-        }
-    }
-
-    template <typename... Args>
-    std::pair<iterator, bool> emplace(Args&&... args) {
-        ROBIN_HOOD_TRACE(this)
-        Node n{*this, std::forward<Args>(args)...};
-        auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n));
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            n.destroy(*this);
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(*this, std::move(n));
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = std::move(n);
-            break;
-
-        case InsertionState::overflow_error:
-            n.destroy(*this);
-            throwOverflowError();
-            break;
-        }
-
-        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
-                              InsertionState::key_found != idxAndState.second);
-    }
-
-    template <typename... Args>
-    iterator emplace_hint(const_iterator position, Args&&... args) {
-        (void)position;
-        return emplace(std::forward<Args>(args)...).first;
-    }
-
-    template <typename... Args>
-    std::pair<iterator, bool> try_emplace(const key_type& key, Args&&... args) {
-        return try_emplace_impl(key, std::forward<Args>(args)...);
-    }
-
-    template <typename... Args>
-    std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
-        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
-    }
-
-    template <typename... Args>
-    iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) {
-        (void)hint;
-        return try_emplace_impl(key, std::forward<Args>(args)...).first;
-    }
-
-    template <typename... Args>
-    iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) {
-        (void)hint;
-        return try_emplace_impl(std::move(key), std::forward<Args>(args)...).first;
-    }
-
-    template <typename Mapped>
-    std::pair<iterator, bool> insert_or_assign(const key_type& key, Mapped&& obj) {
-        return insertOrAssignImpl(key, std::forward<Mapped>(obj));
-    }
-
-    template <typename Mapped>
-    std::pair<iterator, bool> insert_or_assign(key_type&& key, Mapped&& obj) {
-        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj));
-    }
-
-    template <typename Mapped>
-    iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) {
-        (void)hint;
-        return insertOrAssignImpl(key, std::forward<Mapped>(obj)).first;
-    }
-
-    template <typename Mapped>
-    iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) {
-        (void)hint;
-        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj)).first;
-    }
-
-    std::pair<iterator, bool> insert(const value_type& keyval) {
-        ROBIN_HOOD_TRACE(this)
-        return emplace(keyval);
-    }
-
-    iterator insert(const_iterator hint, const value_type& keyval) {
-        (void)hint;
-        return emplace(keyval).first;
-    }
-
-    std::pair<iterator, bool> insert(value_type&& keyval) {
-        return emplace(std::move(keyval));
-    }
-
-    iterator insert(const_iterator hint, value_type&& keyval) {
-        (void)hint;
-        return emplace(std::move(keyval)).first;
-    }
-
-    // Returns 1 if key is found, 0 otherwise.
-    size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            return 1;
-        }
-        return 0;
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<Self_::is_transparent, size_t>::type count(const OtherKey& key) const {
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            return 1;
-        }
-        return 0;
-    }
-
-    bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
-        return 1U == count(key);
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<Self_::is_transparent, bool>::type contains(const OtherKey& key) const {
-        return 1U == count(key);
-    }
-
-    // Returns a reference to the value found for key.
-    // Throws std::out_of_range if element cannot be found
-    template <typename Q = mapped_type>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<!std::is_void<Q>::value, Q&>::type at(key_type const& key) {
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            doThrow<std::out_of_range>("key not found");
-        }
-        return kv->getSecond();
-    }
-
-    // Returns a reference to the value found for key.
-    // Throws std::out_of_range if element cannot be found
-    template <typename Q = mapped_type>
-    // NOLINTNEXTLINE(modernize-use-nodiscard)
-    typename std::enable_if<!std::is_void<Q>::value, Q const&>::type at(key_type const& key) const {
-        ROBIN_HOOD_TRACE(this)
-        auto kv = mKeyVals + findIdx(key);
-        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
-            doThrow<std::out_of_range>("key not found");
-        }
-        return kv->getSecond();
-    }
-
-    const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return const_iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey>
-    const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return const_iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    typename std::enable_if<Self_::is_transparent, // NOLINT(modernize-use-nodiscard)
-                            const_iterator>::type  // NOLINT(modernize-use-nodiscard)
-    find(const OtherKey& key) const {              // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return const_iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    iterator find(const key_type& key) {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey>
-    iterator find(const OtherKey& key, is_transparent_tag /*unused*/) {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    template <typename OtherKey, typename Self_ = Self>
-    typename std::enable_if<Self_::is_transparent, iterator>::type find(const OtherKey& key) {
-        ROBIN_HOOD_TRACE(this)
-        const size_t idx = findIdx(key);
-        return iterator{mKeyVals + idx, mInfo + idx};
-    }
-
-    iterator begin() {
-        ROBIN_HOOD_TRACE(this)
-        if (empty()) {
-            return end();
-        }
-        return iterator(mKeyVals, mInfo, fast_forward_tag{});
-    }
-    const_iterator begin() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return cbegin();
-    }
-    const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        if (empty()) {
-            return cend();
-        }
-        return const_iterator(mKeyVals, mInfo, fast_forward_tag{});
-    }
-
-    iterator end() {
-        ROBIN_HOOD_TRACE(this)
-        // no need to supply valid info pointer: end() must not be dereferenced, and only node
-        // pointer is compared.
-        return iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
-    }
-    const_iterator end() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return cend();
-    }
-    const_iterator cend() const { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return const_iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
-    }
-
-    iterator erase(const_iterator pos) {
-        ROBIN_HOOD_TRACE(this)
-        // its safe to perform const cast here
-        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
-        return erase(iterator{const_cast<Node*>(pos.mKeyVals), const_cast<uint8_t*>(pos.mInfo)});
-    }
-
-    // Erases element at pos, returns iterator to the next element.
-    iterator erase(iterator pos) {
-        ROBIN_HOOD_TRACE(this)
-        // we assume that pos always points to a valid entry, and not end().
-        auto const idx = static_cast<size_t>(pos.mKeyVals - mKeyVals);
-
-        shiftDown(idx);
-        --mNumElements;
-
-        if (*pos.mInfo) {
-            // we've backward shifted, return this again
-            return pos;
-        }
-
-        // no backward shift, return next element
-        return ++pos;
-    }
-
-    size_t erase(const key_type& key) {
-        ROBIN_HOOD_TRACE(this)
-        size_t idx{};
-        InfoType info{};
-        keyToIdx(key, &idx, &info);
-
-        // check while info matches with the source idx
-        do {
-            if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
-                shiftDown(idx);
-                --mNumElements;
-                return 1;
-            }
-            next(&info, &idx);
-        } while (info <= mInfo[idx]);
-
-        // nothing found to delete
-        return 0;
-    }
-
-    // reserves space for the specified number of elements. Makes sure the old data fits.
-    // exactly the same as reserve(c).
-    void rehash(size_t c) {
-        // forces a reserve
-        reserve(c, true);
-    }
-
-    // reserves space for the specified number of elements. Makes sure the old data fits.
-    // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
-    void reserve(size_t c) {
-        // reserve, but don't force rehash
-        reserve(c, false);
-    }
-
-    // If possible reallocates the map to a smaller one. This frees the underlying table.
-    // Does not do anything if load_factor is too large for decreasing the table's size.
-    void compact() {
-        ROBIN_HOOD_TRACE(this)
-        auto newSize = InitialNumElements;
-        while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) {
-            newSize *= 2;
-        }
-        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
-            throwOverflowError();
-        }
-
-        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
-
-        // only actually do anything when the new size is bigger than the old one. This prevents to
-        // continuously allocate for each reserve() call.
-        if (newSize < mMask + 1) {
-            rehashPowerOfTwo(newSize, true);
-        }
-    }
-
-    size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return mNumElements;
-    }
-
-    size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return static_cast<size_type>(-1);
-    }
-
-    ROBIN_HOOD(NODISCARD) bool empty() const noexcept {
-        ROBIN_HOOD_TRACE(this)
-        return 0 == mNumElements;
-    }
-
-    float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return MaxLoadFactor100 / 100.0F;
-    }
-
-    // Average number of elements per bucket. Since we allow only 1 per bucket
-    float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
-        ROBIN_HOOD_TRACE(this)
-        return static_cast<float>(size()) / static_cast<float>(mMask + 1);
-    }
-
-    ROBIN_HOOD(NODISCARD) size_t mask() const noexcept {
-        ROBIN_HOOD_TRACE(this)
-        return mMask;
-    }
-
-    ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
-        if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
-            return maxElements * MaxLoadFactor100 / 100;
-        }
-
-        // we might be a bit inprecise, but since maxElements is quite large that doesn't matter
-        return (maxElements / 100) * MaxLoadFactor100;
-    }
-
-    ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept {
-        // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load
-        // 64bit types.
-        return numElements + sizeof(uint64_t);
-    }
-
-    ROBIN_HOOD(NODISCARD)
-    size_t calcNumElementsWithBuffer(size_t numElements) const noexcept {
-        auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements);
-        return numElements + (std::min)(maxNumElementsAllowed, (static_cast<size_t>(0xFF)));
-    }
-
-    // calculation only allowed for 2^n values
-    ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const {
-#if ROBIN_HOOD(BITNESS) == 64
-        return numElements * sizeof(Node) + calcNumBytesInfo(numElements);
-#else
-        // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows.
-        auto const ne = static_cast<uint64_t>(numElements);
-        auto const s = static_cast<uint64_t>(sizeof(Node));
-        auto const infos = static_cast<uint64_t>(calcNumBytesInfo(numElements));
-
-        auto const total64 = ne * s + infos;
-        auto const total = static_cast<size_t>(total64);
-
-        if (ROBIN_HOOD_UNLIKELY(static_cast<uint64_t>(total) != total64)) {
-            throwOverflowError();
-        }
-        return total;
-#endif
-    }
-
-private:
-    template <typename Q = mapped_type>
-    ROBIN_HOOD(NODISCARD)
-    typename std::enable_if<!std::is_void<Q>::value, bool>::type has(const value_type& e) const {
-        ROBIN_HOOD_TRACE(this)
-        auto it = find(e.first);
-        return it != end() && it->second == e.second;
-    }
-
-    template <typename Q = mapped_type>
-    ROBIN_HOOD(NODISCARD)
-    typename std::enable_if<std::is_void<Q>::value, bool>::type has(const value_type& e) const {
-        ROBIN_HOOD_TRACE(this)
-        return find(e) != end();
-    }
-
-    void reserve(size_t c, bool forceRehash) {
-        ROBIN_HOOD_TRACE(this)
-        auto const minElementsAllowed = (std::max)(c, mNumElements);
-        auto newSize = InitialNumElements;
-        while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
-            newSize *= 2;
-        }
-        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
-            throwOverflowError();
-        }
-
-        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
-
-        // only actually do anything when the new size is bigger than the old one. This prevents to
-        // continuously allocate for each reserve() call.
-        if (forceRehash || newSize > mMask + 1) {
-            rehashPowerOfTwo(newSize, false);
-        }
-    }
-
-    // reserves space for at least the specified number of elements.
-    // only works if numBuckets if power of two
-    // True on success, false otherwise
-    void rehashPowerOfTwo(size_t numBuckets, bool forceFree) {
-        ROBIN_HOOD_TRACE(this)
-
-        Node* const oldKeyVals = mKeyVals;
-        uint8_t const* const oldInfo = mInfo;
-
-        const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
-
-        // resize operation: move stuff
-        initData(numBuckets);
-        if (oldMaxElementsWithBuffer > 1) {
-            for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) {
-                if (oldInfo[i] != 0) {
-                    // might throw an exception, which is really bad since we are in the middle of
-                    // moving stuff.
-                    insert_move(std::move(oldKeyVals[i]));
-                    // destroy the node but DON'T destroy the data.
-                    oldKeyVals[i].~Node();
-                }
-            }
-
-            // this check is not necessary as it's guarded by the previous if, but it helps
-            // silence g++'s overeager "attempt to free a non-heap object 'map'
-            // [-Werror=free-nonheap-object]" warning.
-            if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
-                // don't destroy old data: put it into the pool instead
-                if (forceFree) {
-                    std::free(oldKeyVals);
-                } else {
-                    DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
-                }
-            }
-        }
-    }
-
-    ROBIN_HOOD(NOINLINE) void throwOverflowError() const {
-#if ROBIN_HOOD(HAS_EXCEPTIONS)
-        throw std::overflow_error("robin_hood::map overflow");
-#else
-        abort();
-#endif
-    }
-
-    template <typename OtherKey, typename... Args>
-    std::pair<iterator, bool> try_emplace_impl(OtherKey&& key, Args&&... args) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
-                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
-                std::forward_as_tuple(std::forward<Args>(args)...));
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
-                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
-                                               std::forward_as_tuple(std::forward<Args>(args)...));
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-            break;
-        }
-
-        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
-                              InsertionState::key_found != idxAndState.second);
-    }
-
-    template <typename OtherKey, typename Mapped>
-    std::pair<iterator, bool> insertOrAssignImpl(OtherKey&& key, Mapped&& obj) {
-        ROBIN_HOOD_TRACE(this)
-        auto idxAndState = insertKeyPrepareEmptySpot(key);
-        switch (idxAndState.second) {
-        case InsertionState::key_found:
-            mKeyVals[idxAndState.first].getSecond() = std::forward<Mapped>(obj);
-            break;
-
-        case InsertionState::new_node:
-            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
-                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
-                std::forward_as_tuple(std::forward<Mapped>(obj)));
-            break;
-
-        case InsertionState::overwrite_node:
-            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
-                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
-                                               std::forward_as_tuple(std::forward<Mapped>(obj)));
-            break;
-
-        case InsertionState::overflow_error:
-            throwOverflowError();
-            break;
-        }
-
-        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
-                              InsertionState::key_found != idxAndState.second);
-    }
-
-    void initData(size_t max_elements) {
-        mNumElements = 0;
-        mMask = max_elements - 1;
-        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements);
-
-        auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
-
-        // malloc & zero mInfo. Faster than calloc everything.
-        auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
-        ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
-                                      << numElementsWithBuffer << ")")
-        mKeyVals = reinterpret_cast<Node*>(
-            detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
-        mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
-        std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node));
-
-        // set sentinel
-        mInfo[numElementsWithBuffer] = 1;
-
-        mInfoInc = InitialInfoInc;
-        mInfoHashShift = InitialInfoHashShift;
-    }
-
-    enum class InsertionState { overflow_error, key_found, new_node, overwrite_node };
-
-    // Finds key, and if not already present prepares a spot where to pot the key & value.
-    // This potentially shifts nodes out of the way, updates mInfo and number of inserted
-    // elements, so the only operation left to do is create/assign a new node at that spot.
-    template <typename OtherKey>
-    std::pair<size_t, InsertionState> insertKeyPrepareEmptySpot(OtherKey&& key) {
-        for (int i = 0; i < 256; ++i) {
-            size_t idx{};
-            InfoType info{};
-            keyToIdx(key, &idx, &info);
-            nextWhileLess(&info, &idx);
-
-            // while we potentially have a match
-            while (info == mInfo[idx]) {
-                if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
-                    // key already exists, do NOT insert.
-                    // see http://en.cppreference.com/w/cpp/container/unordered_map/insert
-                    return std::make_pair(idx, InsertionState::key_found);
-                }
-                next(&info, &idx);
-            }
-
-            // unlikely that this evaluates to true
-            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
-                if (!increase_size()) {
-                    return std::make_pair(size_t(0), InsertionState::overflow_error);
-                }
-                continue;
-            }
-
-            // key not found, so we are now exactly where we want to insert it.
-            auto const insertion_idx = idx;
-            auto const insertion_info = info;
-            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
-                mMaxNumElementsAllowed = 0;
-            }
-
-            // find an empty spot
-            while (0 != mInfo[idx]) {
-                next(&info, &idx);
-            }
-
-            if (idx != insertion_idx) {
-                shiftUp(idx, insertion_idx);
-            }
-            // put at empty spot
-            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
-            ++mNumElements;
-            return std::make_pair(insertion_idx, idx == insertion_idx
-                                                     ? InsertionState::new_node
-                                                     : InsertionState::overwrite_node);
-        }
-
-        // enough attempts failed, so finally give up.
-        return std::make_pair(size_t(0), InsertionState::overflow_error);
-    }
-
-    bool try_increase_info() {
-        ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements
-                                   << ", maxNumElementsAllowed="
-                                   << calcMaxNumElementsAllowed(mMask + 1))
-        if (mInfoInc <= 2) {
-            // need to be > 2 so that shift works (otherwise undefined behavior!)
-            return false;
-        }
-        // we got space left, try to make info smaller
-        mInfoInc = static_cast<uint8_t>(mInfoInc >> 1U);
-
-        // remove one bit of the hash, leaving more space for the distance info.
-        // This is extremely fast because we can operate on 8 bytes at once.
-        ++mInfoHashShift;
-        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
-
-        for (size_t i = 0; i < numElementsWithBuffer; i += 8) {
-            auto val = unaligned_load<uint64_t>(mInfo + i);
-            val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f);
-            std::memcpy(mInfo + i, &val, sizeof(val));
-        }
-        // update sentinel, which might have been cleared out!
-        mInfo[numElementsWithBuffer] = 1;
-
-        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
-        return true;
-    }
-
-    // True if resize was possible, false otherwise
-    bool increase_size() {
-        // nothing allocated yet? just allocate InitialNumElements
-        if (0 == mMask) {
-            initData(InitialNumElements);
-            return true;
-        }
-
-        auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
-        if (mNumElements < maxNumElementsAllowed && try_increase_info()) {
-            return true;
-        }
-
-        ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed="
-                                       << maxNumElementsAllowed << ", load="
-                                       << (static_cast<double>(mNumElements) * 100.0 /
-                                           (static_cast<double>(mMask) + 1)))
-
-        if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) {
-            // we have to resize, even though there would still be plenty of space left!
-            // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case
-            // we have to rehash a few times
-            nextHashMultiplier();
-            rehashPowerOfTwo(mMask + 1, true);
-        } else {
-            // we've reached the capacity of the map, so the hash seems to work nice. Keep using it.
-            rehashPowerOfTwo((mMask + 1) * 2, false);
-        }
-        return true;
-    }
-
-    void nextHashMultiplier() {
-        // adding an *even* number, so that the multiplier will always stay odd. This is necessary
-        // so that the hash stays a mixing function (and thus doesn't have any information loss).
-        mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54);
-    }
-
-    void destroy() {
-        if (0 == mMask) {
-            // don't deallocate!
-            return;
-        }
-
-        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}
-            .nodesDoNotDeallocate(*this);
-
-        // This protection against not deleting mMask shouldn't be needed as it's sufficiently
-        // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
-        // reports a compile error: attempt to free a non-heap object 'fm'
-        // [-Werror=free-nonheap-object]
-        if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
-            ROBIN_HOOD_LOG("std::free")
-            std::free(mKeyVals);
-        }
-    }
-
-    void init() noexcept {
-        mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask);
-        mInfo = reinterpret_cast<uint8_t*>(&mMask);
-        mNumElements = 0;
-        mMask = 0;
-        mMaxNumElementsAllowed = 0;
-        mInfoInc = InitialInfoInc;
-        mInfoHashShift = InitialInfoHashShift;
-    }
-
-    // members are sorted so no padding occurs
-    uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53);                // 8 byte  8
-    Node* mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask); // 8 byte 16
-    uint8_t* mInfo = reinterpret_cast<uint8_t*>(&mMask);                    // 8 byte 24
-    size_t mNumElements = 0;                                                // 8 byte 32
-    size_t mMask = 0;                                                       // 8 byte 40
-    size_t mMaxNumElementsAllowed = 0;                                      // 8 byte 48
-    InfoType mInfoInc = InitialInfoInc;                                     // 4 byte 52
-    InfoType mInfoHashShift = InitialInfoHashShift;                         // 4 byte 56
-                                                    // 16 byte 56 if NodeAllocator
-};
-
-} // namespace detail
-
-// map
-
-template <typename Key, typename T, typename Hash = hash<Key>,
-          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
-using unordered_flat_map = detail::Table<true, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
-
-template <typename Key, typename T, typename Hash = hash<Key>,
-          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
-using unordered_node_map = detail::Table<false, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
-
-template <typename Key, typename T, typename Hash = hash<Key>,
-          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
-using unordered_map =
-    detail::Table<sizeof(robin_hood::pair<Key, T>) <= sizeof(size_t) * 6 &&
-                      std::is_nothrow_move_constructible<robin_hood::pair<Key, T>>::value &&
-                      std::is_nothrow_move_assignable<robin_hood::pair<Key, T>>::value,
-                  MaxLoadFactor100, Key, T, Hash, KeyEqual>;
-
-// set
-
-template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          size_t MaxLoadFactor100 = 80>
-using unordered_flat_set = detail::Table<true, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
-
-template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          size_t MaxLoadFactor100 = 80>
-using unordered_node_set = detail::Table<false, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
-
-template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          size_t MaxLoadFactor100 = 80>
-using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
-                                        std::is_nothrow_move_constructible<Key>::value &&
-                                        std::is_nothrow_move_assignable<Key>::value,
-                                    MaxLoadFactor100, Key, void, Hash, KeyEqual>;
-
-} // namespace robin_hood
-
-#endif

From 86bd1bca048166129a9f3e881365d939531f19b0 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 28 Feb 2023 22:44:50 -0800
Subject: [PATCH 23/49] style add parentheses

---
 src/bustools_correct.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 9c1c12b..bfeec77 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -367,7 +367,7 @@ void bustools_split_correct(Bustools_opt &opt)
 
             if (dump_bool)
             {
-              if (bd.barcode & len_mask != old_barcode)
+              if ((bd.barcode & len_mask) != old_barcode)
               {
                 of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
                 old_barcode = bd.barcode & len_mask;
@@ -574,7 +574,7 @@ void bustools_correct(Bustools_opt &opt)
               uint64_t b_corrected = (ub << (2 * bc2)) | lbc;
               if (dump_bool)
               {
-                if (bd.barcode & len_mask != old_barcode)
+                if ((bd.barcode & len_mask) != old_barcode)
                 {
                   of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
                   old_barcode = bd.barcode & len_mask;
@@ -590,7 +590,7 @@ void bustools_correct(Bustools_opt &opt)
               uint64_t b_corrected = (ubc << (2 * bc2)) | lb;
               if (dump_bool)
               {
-                if (bd.barcode & len_mask != old_barcode)
+                if ((bd.barcode & len_mask) != old_barcode)
                 {
                   of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
                   old_barcode = bd.barcode & len_mask;

From ae96146f05d7b27906c6da59a6f8c7614da53fc4 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Sun, 5 Mar 2023 12:08:55 -0800
Subject: [PATCH 24/49] try again to undo bitmap

---
 src/Common.cpp         | 82 ++++++++++++++++++++++++------------------
 src/Common.hpp         |  4 +--
 src/bustools_count.cpp |  6 ++--
 3 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/src/Common.cpp b/src/Common.cpp
index 9a95e54..6e0d89f 100644
--- a/src/Common.cpp
+++ b/src/Common.cpp
@@ -73,7 +73,7 @@ std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &
   return std::move(u);
 }
 
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes) {
   if (ecs.empty()) {
     return -1;
   }
@@ -86,35 +86,58 @@ int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::ve
     return ecs[0]; // no work
   }
 
-  uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[0]][0])));
-  u = Roaring(ecmap[ecs[0]].size(), data);
+  u.resize(0);
+  auto &v = ecmap[ecs[0]]; // copy
+  for (size_t i = 0; i< v.size(); i++) {
+    u.push_back(v[i]);
+  }
   
   for (size_t i = 1; i < ecs.size(); i++) {
     if (ecs[i] < 0 || ecs[i] >= ecmap.size()) {
       return -1;
     }
-    data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ecs[i]][0])));
-    u &= Roaring(ecmap[ecs[i]].size(), data);
+    const auto &v = ecmap[ecs[i]];
+    
+    int j = 0;
+    int k = 0;
+    int l = 0;
+    int n = u.size();
+    int m = v.size();
+    // u and v are sorted, j,k,l = 0
+    while (j < n && l < m) {
+      // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m
+      //            u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted
+      if (u[j] < v[l]) {
+        j++;
+      } else if (u[j] > v[l]) {
+        l++;
+      } else {
+        // match
+        if (k < j) {
+          std::swap(u[k], u[j]);
+        }
+        k++;
+        j++;
+        l++;
+      }
+    }
+    if (k < n) {
+      u.resize(k);
+    }
   }
 
-  if (u.isEmpty()) {
+  if (u.empty()) {
     return -1;
   }
   auto iit = ecmapinv.find(u);
   if (iit == ecmapinv.end()) { 
     // create new equivalence class
     int32_t ec = ecmap.size();
-    uint32_t* u_arr = new uint32_t[u.cardinality()];
-    u.toUint32Array(u_arr);
-    std::vector<int32_t> u_vec;
-    u_vec.reserve(u.cardinality());
-    for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
-    delete[] u_arr;
-    ecmap.push_back(u_vec);
+    ecmap.push_back(u);
     ecmapinv.insert({u,ec});
     // figure out the gene list
     std::vector<int32_t> v;
-    vt2gene(u_vec, genemap, v);
+    vt2gene(u, genemap, v);
     ec2genes.push_back(std::move(v));
     return ec;
   } else {
@@ -192,7 +215,7 @@ void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const  std::vector<
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty) {
   
   std::vector<std::vector<int32_t>> gu; // per gene transcript results
-  Roaring u; // final list of transcripts
+  std::vector<int32_t> u; // final list of transcripts
   std::vector<int32_t> glist;
 
   int32_t lastg = -2;
@@ -222,9 +245,11 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     // frequent case, single gene replace with union
     for (auto ec : ecs) {
       for (const auto &t : ecmap[ec]) {      
-        u.add(t);
+        u.push_back(t);
       }
     }
+    std::sort(u.begin(), u.end());
+    u.erase(std::unique(u.begin(), u.end()), u.end());
 
     // look up ecs based on u
     int32_t ec = -1;
@@ -235,15 +260,9 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});  
-      uint32_t* u_arr = new uint32_t[u.cardinality()];
-      u.toUint32Array(u_arr);
-      std::vector<int32_t> u_vec;
-      u_vec.reserve(u.cardinality());
-      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
-      delete[] u_arr;
-      ecmap.push_back(u_vec);
+      ecmap.push_back(u);
       std::vector<int32_t> v;
-      vt2gene(u_vec, genemap, v);
+      vt2gene(u, genemap, v);
       ec2genes.push_back(std::move(v));
     }
 
@@ -272,13 +291,14 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
       }
 
       for (auto t : uu) { 
-        u.add(t);
+        u.push_back(t);
       }
     }
 
-    if (u.isEmpty()) {
+    if (u.empty()) {
       return -1;
     }
+    std::sort(u.begin(), u.end());
 
     int32_t ec = -1;
     auto it = ecmapinv.find(u);
@@ -287,15 +307,9 @@ int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vec
     } else {
       ec = ecmapinv.size();
       ecmapinv.insert({u,ec});
-      uint32_t* u_arr = new uint32_t[u.cardinality()];
-      u.toUint32Array(u_arr);
-      std::vector<int32_t> u_vec;
-      u_vec.reserve(u.cardinality());
-      for (size_t i = 0; i < u.cardinality(); i++) u_vec.push_back(static_cast<int32_t>(u_arr[i]));
-      delete[] u_arr;
-      ecmap.push_back(u_vec);
+      ecmap.push_back(u);
       std::vector<int32_t> v;
-      vt2gene(u_vec, genemap, v);
+      vt2gene(u, genemap, v);
       ec2genes.push_back(std::move(v));
     }
     return ec;
diff --git a/src/Common.hpp b/src/Common.hpp
index 4b290a3..6a50ebd 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -185,12 +185,12 @@ struct RoaringHasher {
     return r;
   }
 };
-typedef u_map_<Roaring, int32_t, RoaringHasher> EcMapInv;
+typedef u_map_<std::vector<int32_t>, int32_t, SortedVectorHasher> EcMapInv;
 
 std::vector<int32_t> intersect(std::vector<int32_t> &u, std::vector<int32_t> &v);
 std::vector<int32_t> union_vectors(const std::vector<std::vector<int32_t>> &v);
 std::vector<int32_t> intersect_vectors(const std::vector<std::vector<int32_t>> &v);
-int32_t intersect_ecs(const std::vector<int32_t> &ecs, Roaring &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
+int32_t intersect_ecs(const std::vector<int32_t> &ecs, std::vector<int32_t> &u, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes);
 void vt2gene(const std::vector<int32_t> &v, const std::vector<int32_t> &genemap, std::vector<int32_t> &glist);
 void intersect_genes_of_ecs(const std::vector<int32_t> &ecs, const std::vector<std::vector<int32_t>> &ec2genes, std::vector<int32_t> &glist);
 int32_t intersect_ecs_with_genes(const std::vector<int32_t> &ecs, const std::vector<int32_t> &genemap, std::vector<std::vector<int32_t>> &ecmap, EcMapInv &ecmapinv, std::vector<std::vector<int32_t>> &ec2genes, bool assumeIntersectionIsEmpty = true);
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index e2a63a7..e0d125f 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -32,8 +32,7 @@ void bustools_count(Bustools_opt &opt) {
   ecmap = std::move(h.ecs);
   ecmapinv.reserve(ecmap.size());
   for (int32_t ec = 0; ec < ecmap.size(); ec++) {
-    uint32_t *data = reinterpret_cast<uint32_t*>(const_cast<int32_t*>(&(ecmap[ec][0])));
-    ecmapinv.insert({Roaring(ecmap[ec].size(), data), ec});
+    ecmapinv.insert({ecmap[ec], ec});
   }
   std::vector<std::vector<int32_t>> ec2genes;        
   create_ec2genes(ecmap, genemap, ec2genes);
@@ -88,7 +87,8 @@ void bustools_count(Bustools_opt &opt) {
   std::vector<int32_t> ecs;
   std::vector<int32_t> glist;
   ecs.reserve(100);
-  Roaring u;
+  std::vector<int32_t> u;
+  u.reserve(100);
   std::vector<int32_t> column_v;
   std::vector<std::pair<int32_t, std::pair<double, COUNT_MTX_TYPE>>> column_vp; // gene, {count, matrix type}
   if (!opt.count_collapse) {

From eb58f13e59c384ea07dbd81b62f93b5262f40848 Mon Sep 17 00:00:00 2001
From: Pall Melsted <pmelsted@gmail.com>
Date: Tue, 14 Mar 2023 17:47:49 +0000
Subject: [PATCH 25/49] adds multicore sorting

---
 src/bustools_sort.cpp | 507 ++++++++++++++++++++++++++++--------------
 1 file changed, 335 insertions(+), 172 deletions(-)

diff --git a/src/bustools_sort.cpp b/src/bustools_sort.cpp
index 0f2323c..046f265 100644
--- a/src/bustools_sort.cpp
+++ b/src/bustools_sort.cpp
@@ -4,11 +4,14 @@
 #include <algorithm>
 #include <queue>
 #include <functional>
+#include <thread>
 
 #include "Common.hpp"
 #include "BUSData.h"
 #include "bustools_sort.h"
 
+#include <time.h>
+
 #define TP std::pair<BUSData, int>
 
 //This code is for automatically creating the tmp directory supplied if it doesn't exist
@@ -16,9 +19,61 @@
 //#include <filesystem> //once filesystem is acceptable for minGW, switch to that
 #include "windows.h" //Needed for CreateDirectory
 
+
+  void EnsureWindowsTempDirectoryExists(const Bustools_opt &opt) {
+    //Make sure to create the tmp directory if it doesn't exist - writing temporary files fails otherwise in Windows
+    //First get the directory - in theory, opt.temp_files can look like "tmp/x_" or just "x_" (or even nothing)
+    //so we should find the last slash and make sure that directory exists
+
+    std::size_t ind = opt.temp_files.rfind('/');
+    std::size_t ind2 = opt.temp_files.rfind('\\');
+    if (ind == std::string::npos)
+    {
+    ind = ind2;
+    }
+    else if (ind2 != std::string::npos)
+    {
+    //both valid, take the largest value (representing the last slash)
+    ind = std::max(ind, ind2);
+    }
+    if (ind != std::string::npos)
+    {
+      auto dirName = opt.temp_files.substr(0, ind);
+    //When our MinGW builds support c++17, change to std::filesystem
+  
+      //std::filesystem::path filepath = dirName;
+      //if (!std::filesystem::is_directory(filepath)) 
+      //{
+      //    std::filesystem::create_directory(filepath);
+      //}
+    CreateDirectory(dirName.c_str(), NULL); //This will do nothing if the directory exists already
+    }
+  }
+
+    //There is a bug in Windows, where bustools sort fails. The problem is that 
+  //gcount for some reason fails here if too much is read and returns 0, even though
+  //it succeeds. Could perhaps be a 32 bit issue somewhere, does size_t become 32 bits?
+  //Anyway, this is a workaround that fixes the issue - does the same as the flag -m 100000000.
+  //An interesting observation is that opt.max_memory is set to 1 << 32, which will become exactly
+  //zero if truncated to 32 bits...
+  size_t WindowsMaxMemory(size_t mem) {
+
+    const size_t win_mem_max = 1e8;
+    if (mem > win_mem_max) {
+    mem = win_mem_max;
+    }
+    return mem;
+  }
+#else
+  void EnsureWindowsTempDirectoryExists(const Bustools_opt &opt) {}
+  size_t WindowsMaxMemory(size_t mem) {return mem;}
+
 #endif
 
 
+
+
+
 inline bool cmp1(const BUSData &a, const BUSData &b)
 {
   if (a.barcode == b.barcode)
@@ -328,22 +383,119 @@ inline bool ncmp5(const TP &a, const TP &b)
   }
 };
 
+
+void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const BUSData &, const BUSData &)) {
+  //std::sort(busdata, busdata + N, cmp);
+  if (t > 1 && N > 100000) {
+    const size_t s = 256;
+    std::vector<BUSData> samples, pivots;
+
+    // samples = drawn from 0, s, 2s, ... , t*s
+    samples.reserve(s*t);
+    for (int i = 0; i < s*t; ++i) {
+      samples.push_back(busdata[i * (N / (s*t))]);
+    }
+    std::sort(samples.begin(), samples.end(), cmp);
+
+    pivots.reserve(t-1);
+    // piviots are samples s, 2s, ... , (t-1)*s
+    for (int i = 1; i < t; ++i) {
+      pivots.push_back(samples[i * s]);
+      
+      //std::cerr << "pivot " << i << " = " << binaryToString(pivots[i-1].barcode, 16) << std::endl;
+    }
+
+    // buckets are locations of pivots after partitioning
+    // partition i is between buckets[i] and buckets[i+1]
+    std::vector<size_t> buckets(t+1, 0);
+    buckets[0] = 0;
+    buckets[t] = N;
+
+
+    double partition_time = 0;
+    clock_t start, end;
+    start = clock();
+    for (int i = 0; i < t-1; i++) {
+      BUSData p = pivots[i];
+      //std::cerr << "partitioning around " << binaryToString(p.barcode, 16) << std::endl;
+      auto mid =  std::partition(busdata + buckets[i], busdata + N, [&p, &cmp](const BUSData &a) { return cmp(a, p); }) - busdata;
+      buckets[i+1] = mid;
+      //std::cerr << "bucket " << i << " has " << buckets[i+1] - buckets[i] << " elements, mid =  " << mid << std::endl;
+    }
+    //std::cerr << "bucket " << t-1 << " has " << buckets[t] - buckets[t-1] << " elements" << std::endl;
+
+    // verify that the pivots are sorted
+    for (int i = 0; i < t-2; i++) {
+      if (!cmp(pivots[i], pivots[i+1])) {
+        std::cerr << "pivot " << i << " is not smaller than pivot " << i+1 << std::endl;
+        exit(1);
+      }
+    }
+
+    //verify that each partition is smaller than the pivot
+    for (int i = 0; i < t; i++) {
+      for (size_t j = buckets[i]; j < buckets[i+1]; j++) {
+        if (i < t-1 && !cmp(busdata[j], pivots[i])) {
+          std::cerr << "partition " << i << " has an element larger than the pivot" << std::endl;
+          std::cerr << "element " << j << " = " << binaryToString(busdata[j].barcode, 16) << std::endl;
+          std::cerr << "pivot " << i << " = " << binaryToString(pivots[i].barcode, 16) << std::endl;
+
+          exit(1);
+        }
+        if (i > 0 && cmp(busdata[j], pivots[i-1])) {
+          std::cerr << "partition " << i << " has an element smaller than the next pivot" << std::endl;
+          std::cerr << "element " << j << " = " << binaryToString(busdata[j].barcode, 16) << std::endl;
+          std::cerr << "pivot " << i+1 << " = " << binaryToString(pivots[i+1].barcode, 16) << std::endl;
+          exit(1);
+        }
+      }
+    }
+
+    // partition the busdata based on the middle pivot
+    /*
+    std::function<void(int,int)> mid_partition = [&](int i, int j) { 
+      if (j-i <= 1) {
+        return;
+      }
+      size_t k = (j-i)/2;
+      BUSData p = pivots[k-1];
+      buckets[j] = std::partition(busdata + buckets[i], busdata + buckets[j], [&p, &cmp](const BUSData &a) { return cmp(a, p); });
+      mid_partition(i, k);
+      mid_partition(k, j);
+    };
+
+    mid_partition(0, t);
+    */
+    end = clock();
+    partition_time  += ((double) (end - start)) / CLOCKS_PER_SEC;
+    std::cerr << "partition time: " << partition_time << "s" << std::endl;
+
+
+
+    // sort each bucket
+    std::vector<std::thread> workers;
+    for (int i = 0; i < t; ++i) {
+      workers.push_back(std::thread([&busdata, &buckets, &cmp, i]() {
+        //std::cerr << "sorting bucket " << i << " with " << buckets[i] <<  " to " << buckets[i+1]<< std::endl;
+        std::sort(busdata + buckets[i], busdata + buckets[i+1], cmp);
+      }));
+    }
+
+    for (auto &w : workers) {
+      w.join();
+    }
+
+
+  } else {
+    std::sort(busdata, busdata + N, cmp);
+  }
+
+}
+
 void bustools_sort(const Bustools_opt &opt)
 {
-  auto mem = opt.max_memory;
-  //There is a bug in Windows, where bustools sort fails. The problem is that 
-  //gcount for some reason fails here if too much is read and returns 0, even though
-  //it succeeds. Could perhaps be a 32 bit issue somewhere, does size_t become 32 bits?
-  //Anyway, this is a workaround that fixes the issue - does the same as the flag -m 100000000.
-  //An interesting observation is that opt.max_memory is set to 1 << 32, which will become exactly
-  //zero if truncated to 32 bits...
-#if defined(__MINGW32__) || defined(_MSC_VER)
-  const size_t win_mem_max = 1e8;
-  if (mem > win_mem_max)
-  {
-	mem = win_mem_max;
-  }
-#endif
+  auto mem = WindowsMaxMemory(opt.max_memory);
+
   BUSHeader h;
   size_t N = mem / sizeof(BUSData);
   BUSData *p = new BUSData[N];
@@ -381,49 +533,60 @@ void bustools_sort(const Bustools_opt &opt)
     exit(1);
   }
 
-#if defined(__MINGW32__) || defined(_MSC_VER)
-  //Make sure to create the tmp directory if it doesn't exist - writing temporary files fails otherwise in Windows
-  //First get the directory - in theory, opt.temp_files can look like "tmp/x_" or just "x_" (or even nothing)
-  //so we should find the last slash and make sure that directory exists
 
-  std::size_t ind = opt.temp_files.rfind('/');
-  std::size_t ind2 = opt.temp_files.rfind('\\');
-  if (ind == std::string::npos)
-  {
-	ind = ind2;
-  }
-  else if (ind2 != std::string::npos)
-  {
-	//both valid, take the largest value (representing the last slash)
-	ind = std::max(ind, ind2);
-  }
-  if (ind != std::string::npos)
-  {
-    auto dirName = opt.temp_files.substr(0, ind);
-	//When our MinGW builds support c++17, change to std::filesystem
- 
-    //std::filesystem::path filepath = dirName;
-    //if (!std::filesystem::is_directory(filepath)) 
-    //{
-    //    std::filesystem::create_directory(filepath);
-    //}
-	CreateDirectory(dirName.c_str(), NULL); //This will do nothing if the directory exists already
-  }
-#endif
+  
 
-  size_t sc = 0;
+  size_t sc = 0; // number of records read
+  double sorting_time = 0;
   int tmp_file_no = 0;
-  for (const auto &infn : opt.files)
-  {
+
+  // only use a single buffer if we are reading from stdin or if we have a single file
+  bool all_in_buffer = opt.stream_in || opt.files.size() == 1;
+
+
+  const auto collapse_and_write = [&](BUSData *p, size_t rc, std::ostream &outf) {
+    for (size_t i = 0; i < rc;) {
+      size_t j = i + 1;
+      uint32_t c = p[i].count;
+      auto ec = p[i].ec;
+      for (; j < rc; j++) {
+        if (p[i].barcode == p[j].barcode && p[i].UMI == p[j].UMI && p[i].ec == p[j].ec && p[i].flags == p[j].flags && p[i].pad == p[j].pad) {
+          c += p[j].count;
+        } else {
+          break;
+        }
+      }
+      // merge identical things
+      p[i].count = c;
+      outf.write((char *)(&(p[i])), sizeof(p[i]));
+      // increment
+      i = j;
+    }
+  };
+
+  // open the correct output stream
+  std::ofstream of;
+  std::streambuf *buf = nullptr;
+  if (!opt.stream_out) {
+      of.open(opt.output, std::ios::out | std::ios::binary);
+      buf = of.rdbuf();
+  } else {
+      buf = std::cout.rdbuf();
+  }
+  std::ostream busf_out(buf);
+
+  // measure time spent reading input
+  clock_t start,end;
+  double reading_time = 0;
+  double writing_time = 0;
+
+  for (const auto &infn : opt.files) {
     std::streambuf *inbuf;
     std::ifstream inf;
-    if (!opt.stream_in)
-    {
+    if (!opt.stream_in) {
       inf.open(infn.c_str(), std::ios::binary);
       inbuf = inf.rdbuf();
-    }
-    else
-    {
+    } else {
       inbuf = std::cin.rdbuf();
     }
     std::istream in(inbuf);
@@ -432,163 +595,163 @@ void bustools_sort(const Bustools_opt &opt)
 
     int rc = 1;
 
-    while (in.good())
-    {
-      // read as much as we can
+
+    while (in.good()) {
+      
+      start = clock();
       in.read((char *)p, N * sizeof(BUSData));
       size_t rc = in.gcount() / sizeof(BUSData);
-      if (rc == 0)
-      {
+      end = clock();
+      reading_time += ((double) (end - start)) / CLOCKS_PER_SEC;
+
+      // no records read, we are done
+      if (rc == 0) {
         break;
       }
+
+      // records did not fit in buffer
+      if (rc >= N) {
+        all_in_buffer = false;
+      }
+      
       // now sort the data
-      std::sort(p, p + rc, cmp);
-      sc += rc;
+      start = clock();
+      //std::sort(p, p + rc, cmp);
+      sort_bus_array(p, rc, opt.threads, cmp);
+      end = clock();
+      sorting_time += ((double) (end - start)) / CLOCKS_PER_SEC;
 
-      // write the output
-      std::ofstream outf(opt.temp_files + std::to_string(tmp_file_no), std::ios::binary);
-      writeHeader(outf, h);
+      sc += rc;
 
-      for (size_t i = 0; i < rc;)
-      {
-        size_t j = i + 1;
-        uint32_t c = p[i].count;
-        auto ec = p[i].ec;
-        for (; j < rc; j++)
-        {
-          if (p[i].barcode == p[j].barcode && p[i].UMI == p[j].UMI && p[i].ec == p[j].ec && p[i].flags == p[j].flags && p[i].pad == p[j].pad)
-          {
-            c += p[j].count;
-          }
-          else
-          {
-            break;
-          }
-        }
-        // merge identical things
-        p[i].count = c;
-        outf.write((char *)(&(p[i])), sizeof(p[i]));
-        // increment
-        i = j;
+      if (all_in_buffer) {
+        std::cerr << " all fits in buffer" << std::endl;
+        // single file or stream, all data fits in buffer, write directly to output
+        start = clock();
+        writeHeader(busf_out, h);
+        collapse_and_write(p, rc, busf_out);
+        end = clock();
+        writing_time = ((double) (end - start)) / CLOCKS_PER_SEC;
+      } else {
+        // need to sort in chunks
+        // write the output
+        std::ofstream outf(opt.temp_files + std::to_string(tmp_file_no), std::ios::binary);
+        writeHeader(outf, h);
+
+        collapse_and_write(p, rc, outf);
+    
+        outf.close();
+        tmp_file_no++;
       }
-
-      outf.close();
-      tmp_file_no++;
+      
     }
   }
   delete[] p;
   p = nullptr;
 
   std::cerr << "Read in " << sc << " BUS records" << std::endl;
+  
+  std::cerr << "reading time " << reading_time << "s" << std::endl;
+  std::cerr << "sorting time " << sorting_time << "s" << std::endl;
+  std::cerr << "writing time " << writing_time << "s" << std::endl;
 
-  std::streambuf *buf = nullptr;
-  std::ofstream of;
 
-  if (!opt.stream_out)
-  {
-    of.open(opt.output, std::ios::out | std::ios::binary);
-    buf = of.rdbuf();
-  }
-  else
-  {
-    buf = std::cout.rdbuf();
-  }
-  std::ostream busf_out(buf);
 
-  writeHeader(busf_out, h);
+  
+  if (!all_in_buffer) {
+    writeHeader(busf_out, h);
 
-  // todo: skip writing to disk if it fits in memory
-  if (tmp_file_no == 1)
-  {
-    size_t M = N / 8;
-    p = new BUSData[M];
-    std::ifstream in(opt.temp_files + "0", std::ios::binary);
-    BUSHeader tmp;
-    parseHeader(in, tmp);
-    while (in.good())
+    if (tmp_file_no == 1)
     {
-      // read as much as we can
-      in.read((char *)p, M * sizeof(BUSData));
-      size_t rc = in.gcount() / sizeof(BUSData);
-      if (rc == 0)
+      size_t M = N / 8;
+      p = new BUSData[M];
+      std::ifstream in(opt.temp_files + "0", std::ios::binary);
+      BUSHeader tmp;
+      parseHeader(in, tmp);
+      while (in.good())
       {
-        break;
+        // read as much as we can
+        in.read((char *)p, M * sizeof(BUSData));
+        size_t rc = in.gcount() / sizeof(BUSData);
+        if (rc == 0)
+        {
+          break;
+        }
+        busf_out.write((char *)p, rc * sizeof(BUSData));
       }
-      busf_out.write((char *)p, rc * sizeof(BUSData));
+      in.close();
+      std::remove((opt.temp_files + "0").c_str());
     }
-    in.close();
-    std::remove((opt.temp_files + "0").c_str());
-  }
-  else
-  {
-    // TODO: test if replacing with k-way merge is better
-    // adapted from https://github.com/arq5x/kway-mergesort/blob/master/kwaymergesort.h
-    int k = tmp_file_no;
-    size_t M = N / (k);
-    //std::memset(p, 0, N*sizeof(BUSData));
-    std::vector<std::ifstream> bf(k);
-    for (int i = 0; i < k; i++)
-    {
-      bf[i].open((opt.temp_files + std::to_string(i)).c_str(), std::ios::binary);
-      BUSHeader tmp;
-      parseHeader(bf[i], tmp);
-    }
-
-    std::priority_queue<TP, std::vector<TP>, std::function<bool(const TP &a, const TP &b)>> pq(ncmp);
-    BUSData t;
-    for (int i = 0; i < k; i++)
-    {
-      bf[i].read((char *)&t, sizeof(t));
-      pq.push({t, i});
-    }
-
-    BUSData curr = pq.top().first;
-    curr.count = 0; // we'll count this again in the first loop
-    while (!pq.empty())
+    else
     {
-      TP min = pq.top();
+      // TODO: test if replacing with k-way merge is better
+      // adapted from https://github.com/arq5x/kway-mergesort/blob/master/kwaymergesort.h
+      int k = tmp_file_no;
+      size_t M = N / (k);
+      //std::memset(p, 0, N*sizeof(BUSData));
+      std::vector<std::ifstream> bf(k);
+      for (int i = 0; i < k; i++)
+      {
+        bf[i].open((opt.temp_files + std::to_string(i)).c_str(), std::ios::binary);
+        BUSHeader tmp;
+        parseHeader(bf[i], tmp);
+      }
 
-      pq.pop();
-      // process the data
-      BUSData &m = min.first;
-      int i = min.second;
-      if (m.barcode == curr.barcode && m.UMI == curr.UMI && m.ec == curr.ec && m.flags == curr.flags && m.pad == curr.pad)
+      std::priority_queue<TP, std::vector<TP>, std::function<bool(const TP &a, const TP &b)>> pq(ncmp);
+      BUSData t;
+      for (int i = 0; i < k; i++)
       {
-        // same data, increase count
-        curr.count += m.count;
+        bf[i].read((char *)&t, sizeof(t));
+        pq.push({t, i});
       }
-      else
+
+      BUSData curr = pq.top().first;
+      curr.count = 0; // we'll count this again in the first loop
+      while (!pq.empty())
       {
+        TP min = pq.top();
 
-        // new data let's output curr, new curr is m
-        if (curr.count != 0)
+        pq.pop();
+        // process the data
+        BUSData &m = min.first;
+        int i = min.second;
+        if (m.barcode == curr.barcode && m.UMI == curr.UMI && m.ec == curr.ec && m.flags == curr.flags && m.pad == curr.pad)
         {
-          busf_out.write((char *)&curr, sizeof(curr));
+          // same data, increase count
+          curr.count += m.count;
         }
-        curr = m;
-      }
-      // read next from stream
-      if (bf[i].good())
-      {
-        bf[i].read((char *)&t, sizeof(t));
-        if (bf[i].gcount() > 0)
+        else
+        {
+
+          // new data let's output curr, new curr is m
+          if (curr.count != 0)
+          {
+            busf_out.write((char *)&curr, sizeof(curr));
+          }
+          curr = m;
+        }
+        // read next from stream
+        if (bf[i].good())
         {
-          pq.push({t, i});
+          bf[i].read((char *)&t, sizeof(t));
+          if (bf[i].gcount() > 0)
+          {
+            pq.push({t, i});
+          }
         }
       }
-    }
 
-    if (curr.count > 0)
-    {
-      // write out remaining straggler
-      busf_out.write((char *)&curr, sizeof(curr));
-    }
+      if (curr.count > 0)
+      {
+        // write out remaining straggler
+        busf_out.write((char *)&curr, sizeof(curr));
+      }
 
-    // remove intermediary files
-    for (int i = 0; i < k; i++)
-    {
-      bf[i].close();
-      std::remove((opt.temp_files + std::to_string(i)).c_str());
+      // remove intermediary files
+      for (int i = 0; i < k; i++)
+      {
+        bf[i].close();
+        std::remove((opt.temp_files + std::to_string(i)).c_str());
+      }
     }
   }
 

From b762a52d2504b7ed1496f19515de7578fa132d86 Mon Sep 17 00:00:00 2001
From: Pall Melsted <pmelsted@gmail.com>
Date: Tue, 14 Mar 2023 22:14:46 +0000
Subject: [PATCH 26/49] better partition function

---
 src/bustools_sort.cpp | 47 +++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/src/bustools_sort.cpp b/src/bustools_sort.cpp
index 046f265..ae79e66 100644
--- a/src/bustools_sort.cpp
+++ b/src/bustools_sort.cpp
@@ -415,6 +415,7 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B
     double partition_time = 0;
     clock_t start, end;
     start = clock();
+    /*
     for (int i = 0; i < t-1; i++) {
       BUSData p = pivots[i];
       //std::cerr << "partitioning around " << binaryToString(p.barcode, 16) << std::endl;
@@ -423,7 +424,28 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B
       //std::cerr << "bucket " << i << " has " << buckets[i+1] - buckets[i] << " elements, mid =  " << mid << std::endl;
     }
     //std::cerr << "bucket " << t-1 << " has " << buckets[t] - buckets[t-1] << " elements" << std::endl;
+    */
+
+    // partition the busdata based on the middle pivot
+    std::function<void(int,int)> mid_partition = [&](int i, int j) { 
+      if (j-i <= 1) {
+        return;
+      }
+      size_t k = i + (j-i)/2;
+      //std::cerr << "partitioning " << i << " to " << j << " with middle " << k << std::endl;
+      //std::cerr << "buckets i and j are " << buckets[i] << " and " << buckets[j] << std::endl;
+      BUSData p = pivots[k-1];
+      //std::cerr << "pivot element is " << binaryToString(p.barcode, 16) << std::endl;
+      buckets[k] = std::partition(busdata + buckets[i], busdata + buckets[j], [&p, &cmp](const BUSData &a) { return cmp(a, p); }) - busdata;
+      //std::cerr << "bucket " << k << " is " <<  buckets[k] << std::endl;
+      mid_partition(i, k);
+      mid_partition(k, j);
+    };
 
+    mid_partition(0, t);
+    
+
+    /*
     // verify that the pivots are sorted
     for (int i = 0; i < t-2; i++) {
       if (!cmp(pivots[i], pivots[i+1])) {
@@ -431,7 +453,11 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B
         exit(1);
       }
     }
-
+    
+    for (int i = 0; i < t; i++) {
+      std::cerr << "bucket " << i << " at " << buckets[i] <<  " has " << buckets[i+1] - buckets[i] << " elements" << std::endl;
+    }
+    
     //verify that each partition is smaller than the pivot
     for (int i = 0; i < t; i++) {
       for (size_t j = buckets[i]; j < buckets[i+1]; j++) {
@@ -445,27 +471,14 @@ void sort_bus_array(BUSData* busdata, size_t N, const int t, bool (*cmp)(const B
         if (i > 0 && cmp(busdata[j], pivots[i-1])) {
           std::cerr << "partition " << i << " has an element smaller than the next pivot" << std::endl;
           std::cerr << "element " << j << " = " << binaryToString(busdata[j].barcode, 16) << std::endl;
-          std::cerr << "pivot " << i+1 << " = " << binaryToString(pivots[i+1].barcode, 16) << std::endl;
+          std::cerr << "pivot " << i-1 << " = " << binaryToString(pivots[i-1].barcode, 16) << std::endl;
           exit(1);
         }
       }
     }
-
-    // partition the busdata based on the middle pivot
-    /*
-    std::function<void(int,int)> mid_partition = [&](int i, int j) { 
-      if (j-i <= 1) {
-        return;
-      }
-      size_t k = (j-i)/2;
-      BUSData p = pivots[k-1];
-      buckets[j] = std::partition(busdata + buckets[i], busdata + buckets[j], [&p, &cmp](const BUSData &a) { return cmp(a, p); });
-      mid_partition(i, k);
-      mid_partition(k, j);
-    };
-
-    mid_partition(0, t);
     */
+
+    
     end = clock();
     partition_time  += ((double) (end - start)) / CLOCKS_PER_SEC;
     std::cerr << "partition time: " << partition_time << "s" << std::endl;

From 679f84b0cf3509ddc9dd3bd5b6ce13846dbd99ec Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Wed, 15 Mar 2023 03:10:21 -0700
Subject: [PATCH 27/49] Add priority option

---
 src/bustools_main.cpp | 123 ++++++++++++++++++++----------------------
 1 file changed, 57 insertions(+), 66 deletions(-)

diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index 991c099..24c8814 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -335,6 +335,8 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
   int cm_flag = 0;
   int hist_flag = 0;
   int rawcounts_flag = 0;
+  int priority_one = 0;
+  int priority_two = 0;
   static struct option long_options[] = {
     {"output", required_argument, 0, 'o'},
     {"genemap", required_argument, 0, 'g'},
@@ -349,6 +351,8 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
     {"downsample", required_argument, 0, 'd'},
     {"rawcounts", no_argument, &rawcounts_flag, 1},
     {"split", required_argument, 0, 's'},
+    {"priority-1", no_argument, &priority_one, 1},
+    {"priority-2", no_argument, &priority_two, 1},
     {0, 0, 0, 0}};
   
   int option_index = 0, c;
@@ -383,20 +387,16 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
       break;
     }
   }
-  if (gene_flag)
-  {
+  if (gene_flag) {
     opt.count_collapse = true;
   }
-  if (umigene_flag)
-  {
+  if (umigene_flag) {
     opt.umi_gene_collapse = true;
   }
-  if (em_flag)
-  {
+  if (em_flag) {
     opt.count_em = true;
   }
-  if (cm_flag)
-  {
+  if (cm_flag) {
     opt.count_cm = true;
   }
   if (hist_flag) {
@@ -405,6 +405,15 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
   if (rawcounts_flag) {
     opt.count_raw_counts = true;
   }
+  if (priority_one) {
+    opt.count_mtx_priority = 1;
+  }
+  if (priority_two) {
+    opt.count_mtx_priority = 2;
+  }
+  if (priority_one && priority_two) {
+    opt.count_mtx_priority = -1; // Can't supply both, raise an error later
+  }
   
   while (optind < argc)
     opt.files.push_back(argv[optind++]);
@@ -1636,95 +1645,75 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
   bool ret = true;
   
   // check for output directory
-  if (opt.output.empty())
-  {
+  if (opt.output.empty()) {
     std::cerr << "Error: Missing output directory" << std::endl;
     ret = false;
   }
-  else
-  {
+  else {
     bool isDir = false;
-    if (checkDirectoryExists(opt.output))
-    {
+    if (checkDirectoryExists(opt.output)) {
       isDir = true;
     }
-    else
-    {
-      if (opt.output.at(opt.output.size() - 1) == '/')
-      {
-        if (my_mkdir(opt.output.c_str(), 0777) == -1)
-        {
+    else {
+      if (opt.output.at(opt.output.size() - 1) == '/') {
+        if (my_mkdir(opt.output.c_str(), 0777) == -1) {
           std::cerr << "Error: could not create directory " << opt.output << std::endl;
           ret = false;
         }
-        else
-        {
+        else {
           isDir = true;
         }
       }
     }
     
-    if (isDir)
-    {
+    if (isDir) {
       opt.output += "output";
     }
   }
   
-  if (opt.count_em && opt.count_gene_multimapping)
-  {
+  if (opt.count_em && opt.count_gene_multimapping) {
     std::cerr << "Error: EM algorithm and counting multimapping reads are incompatible" << std::endl;
     ret = false;
   }
   
-  if (opt.count_em && opt.count_cm)
-  {
+  if (opt.count_em && opt.count_cm) {
     std::cerr << "Error: EM algorithm and counting multiplicites are incompatible" << std::endl;
     ret = false;
   }
   
-  if (opt.umi_gene_collapse && opt.count_cm)
-  {
+  if (opt.umi_gene_collapse && opt.count_cm) {
     std::cerr << "Error: Gene-level collapsing of UMIs and counting multiplicites are incompatible" << std::endl;
     ret = false;
   }
   
-  if (opt.umi_gene_collapse && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0))
-  {
+  if (opt.umi_gene_collapse && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0)) {
     std::cerr << "Error: Gene-level collapsing of UMIs is currently incompatible with --hist, --downsample, or --rawcounts" << std::endl;
     ret = false;
   }
   
-  if (opt.count_cm && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0))
-  {
+  if (opt.count_cm && (opt.count_raw_counts || opt.count_gen_hist || opt.count_downsampling_factor != 1.0)) {
     std::cerr << "Error: Counting multiplicites is incompatible with --hist, --downsample, or --rawcounts" << std::endl;
     ret = false;
   }
   
-  if (opt.count_raw_counts && opt.count_em) 
-  {
+  if (opt.count_raw_counts && opt.count_em)  {
     std::cerr << "Error: Counting raw counts are not supported for the EM algorithm" << std::endl;
     ret = false;
   }
   
-  if (opt.count_raw_counts && !opt.count_collapse) 
-  {
+  if (opt.count_raw_counts && !opt.count_collapse)  {
     std::cerr << "Error: Raw counts are currently only supported for gene counting, not ec counting." << std::endl;
     ret = false;
   }
   
-  if (opt.files.size() == 0)
-  {
+  if (opt.files.size() == 0) {
     std::cerr << "Error: Missing BUS input files" << std::endl;
     ret = false;
   }
-  else
-  {
-    if (!opt.stream_in)
-    {
-      for (const auto &it : opt.files)
-      {
-        if (!checkFileExists(it))
-        {
+  else {
+    if (!opt.stream_in) {
+      for (const auto &it : opt.files) {
+        if (!checkFileExists(it)) {
           std::cerr << "Error: File not found, " << it << std::endl;
           ret = false;
         }
@@ -1732,13 +1721,11 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
     }
   }
   
-  if (opt.count_genes.size() == 0)
-  {
+  if (opt.count_genes.size() == 0) {
     std::cerr << "Error: missing gene mapping file" << std::endl;
     ret = false;
   }
-  else
-  {
+  else {
     if (!checkFileExists(opt.count_genes))
     {
       std::cerr << "Error: File not found " << opt.count_genes << std::endl;
@@ -1746,13 +1733,11 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
     }
   }
   
-  if (opt.count_ecs.size() == 0)
-  {
+  if (opt.count_ecs.size() == 0) {
     std::cerr << "Error: missing equivalence class mapping file" << std::endl;
     ret = false;
   }
-  else
-  {
+  else {
     if (!checkFileExists(opt.count_ecs))
     {
       std::cerr << "Error: File not found " << opt.count_ecs << std::endl;
@@ -1760,24 +1745,19 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
     }
   }
   
-  if (opt.count_txp.size() == 0)
-  {
+  if (opt.count_txp.size() == 0) {
     std::cerr << "Error: missing transcript name file" << std::endl;
     ret = false;
   }
-  else
-  {
-    if (!checkFileExists(opt.count_txp))
-    {
+  else {
+    if (!checkFileExists(opt.count_txp)) {
       std::cerr << "Error: File not found " << opt.count_txp << std::endl;
       ret = false;
     }
   }
   
-  if (opt.count_split.size() != 0)
-  {
-    if (!checkFileExists(opt.count_split))
-    {
+  if (opt.count_split.size() != 0) {
+    if (!checkFileExists(opt.count_split)) {
       std::cerr << "Error: File not found " << opt.count_split << std::endl;
       ret = false;
     }
@@ -1787,6 +1767,15 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
     }
   }
   
+  if (opt.count_mtx_priority == -1) {
+    std::cerr << "Error: Cannot specify multiply options for priority " << std::endl;
+    ret = false;
+  }
+  if (opt.count_mtx_priority > 0 && opt.count_split.size() == 0) {
+    std::cerr << "Error: Cannot use priority unless -s is specified " << std::endl;
+    ret = false;
+  }
+  
   return ret;
 }
 
@@ -2675,6 +2664,8 @@ void Bustools_count_Usage()
             << "    --em              Estimate gene abundances using EM algorithm" << std::endl
             << "    --cm              Count multiplicites instead of UMIs" << std::endl
             << "-s, --split           Split output matrix in two (plus ambiguous) based on transcripts supplied in this file" << std::endl
+            << "    --priority-1      For --split, prioritize first matrix in split matrix for UMIs that multimap to both splits" << std::endl
+            << "    --priority-2      For --split, prioritize second matrix in split matrix for UMIs that multimap to both splits" << std::endl
             << "-m, --multimapping    Include bus records that pseudoalign to multiple genes" << std::endl
             << "    --hist            Output copy per UMI histograms for all genes" << std::endl 
             << "-d  --downsample      Specify a factor between 0 and 1 specifying how much to downsample" << std::endl 

From 2aea2ee757ba6b13e8d6732d0342c77b744ac367 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Wed, 15 Mar 2023 03:12:26 -0700
Subject: [PATCH 28/49] updated common for count_mtx_priority

---
 src/Common.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Common.hpp b/src/Common.hpp
index 6a50ebd..89d8b2d 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -72,6 +72,7 @@ struct Bustools_opt
   std::string count_ecs;
   std::string count_txp;
   std::string count_split;
+  int count_mtx_priority = 0;
   bool count_em = false;
   bool count_cm = false;
   bool count_collapse = false;

From 3f287d515332192146508008211f5e25dfc9d7f9 Mon Sep 17 00:00:00 2001
From: Pall Melsted <pmelsted@gmail.com>
Date: Wed, 15 Mar 2023 12:16:23 +0000
Subject: [PATCH 29/49] batches writes

---
 src/bustools_sort.cpp | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/bustools_sort.cpp b/src/bustools_sort.cpp
index ae79e66..adc5e7e 100644
--- a/src/bustools_sort.cpp
+++ b/src/bustools_sort.cpp
@@ -558,6 +558,10 @@ void bustools_sort(const Bustools_opt &opt)
 
 
   const auto collapse_and_write = [&](BUSData *p, size_t rc, std::ostream &outf) {
+    size_t batch = 1<<20;
+    std::vector<BUSData> v;
+    v.reserve(batch);
+
     for (size_t i = 0; i < rc;) {
       size_t j = i + 1;
       uint32_t c = p[i].count;
@@ -571,10 +575,23 @@ void bustools_sort(const Bustools_opt &opt)
       }
       // merge identical things
       p[i].count = c;
-      outf.write((char *)(&(p[i])), sizeof(p[i]));
+
+      // push back p to the vector
+      v.push_back(p[i]);
+
+      if (v.size() >= batch) {
+        outf.write((char *)v.data(), v.size() * sizeof(BUSData));
+        v.clear();
+      }
+
+      //outf.write((char *)(&(p[i])), sizeof(p[i]));
       // increment
       i = j;
     }
+    if (v.size() > 0) {
+      outf.write((char *)v.data(), v.size() * sizeof(BUSData));
+      v.clear();
+    }
   };
 
   // open the correct output stream

From a7af47ad77c29e0d814bbe4129ee417c4f749e3f Mon Sep 17 00:00:00 2001
From: Yenaled <delaneyk.sullivan@gmail.com>
Date: Fri, 17 Mar 2023 19:24:18 +0000
Subject: [PATCH 30/49] Priority rules for mtx types

---
 src/Common.cpp         | 54 ++++++++++++++++++++++++++++++++++++++----
 src/Common.hpp         |  1 -
 src/bustools_count.cpp | 43 +++++++++++++++++++++++++++------
 src/bustools_main.cpp  | 24 -------------------
 4 files changed, 86 insertions(+), 36 deletions(-)

diff --git a/src/Common.cpp b/src/Common.cpp
index 6e0d89f..58c169c 100644
--- a/src/Common.cpp
+++ b/src/Common.cpp
@@ -335,11 +335,57 @@ COUNT_MTX_TYPE intersect_ecs_with_subset_txs(int32_t ec, const std::vector<std::
 }
 
 COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector<int32_t>& ecs, const std::vector<std::vector<int32_t>> &ecmap, const std::vector<int32_t>& tx_split) {
+  // Note: tx_split indices are tx ids and values are 1 (exists in split) or 0 (does not exist in split)
   if (tx_split.size() == 0) return COUNT_DEFAULT;
   if (ecs.size() == 0) return COUNT_AMBIGUOUS; // Shouldn't happen
+  std::vector<int32_t> u;
+  u.resize(0);
+  auto &v = ecmap[ecs[0]]; // copy
+  for (size_t i = 0; i< v.size(); i++) {
+    u.push_back(v[i]);
+  }
+  for (size_t i = 1; i < ecs.size(); i++) {
+    const auto &v = ecmap[ecs[i]];
+    
+    int j = 0;
+    int k = 0;
+    int l = 0;
+    int n = u.size();
+    int m = v.size();
+    // u and v are sorted, j,k,l = 0
+    while (j < n && l < m) {
+      // invariant: u[:k] is the intersection of u[:j] and v[:l], j <= n, l <= m
+      //            u[:j] <= u[j:], v[:l] <= v[l:], u[j:] is sorted, v[l:] is sorted, u[:k] is sorted
+      if (u[j] < v[l]) {
+        j++;
+      } else if (u[j] > v[l]) {
+        l++;
+      } else {
+        // match
+        if (k < j) {
+          std::swap(u[k], u[j]);
+        }
+        k++;
+        j++;
+        l++;
+      }
+    }
+    if (k < n) {
+      u.resize(k);
+    }
+  }
   size_t n_1 = 0;
   size_t n_2 = 0;
-  for (auto ec : ecs) { // We still need to optimize this
+  for (auto t : u) {
+      if(tx_split[t]) {
+        n_2++;
+      } else {
+        n_1++;
+      }
+      if (n_1 > 0 && n_2 > 0) break; // Stop searching
+  }
+  return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT));
+  /*for (auto ec : ecs) { // We still need to optimize this
     for (auto t: ecmap[ec]) {
       if(std::find(tx_split.begin(), tx_split.end(), t) != tx_split.end()) {
         n_2++;
@@ -349,8 +395,8 @@ COUNT_MTX_TYPE intersect_ecs_with_subset_txs(const std::vector<int32_t>& ecs, co
       if (n_1 > 0 && n_2 > 0) break; // Stop searching
     }
     if (n_1 > 0 && n_2 > 0) break; // Stop searching
-  }
-  return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT));
+  }*/
+  //return (n_1 > 0 && n_2 > 0 ? COUNT_AMBIGUOUS : (n_1 > 0 ? COUNT_DEFAULT : COUNT_SPLIT));
 }
 
 
@@ -359,4 +405,4 @@ void copy_file(std::string src, std::string dest) {
 	std::ofstream  idest(dest, std::ios::binary);
 
 	idest << isrc.rdbuf();
-}
\ No newline at end of file
+}
diff --git a/src/Common.hpp b/src/Common.hpp
index 89d8b2d..6a50ebd 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -72,7 +72,6 @@ struct Bustools_opt
   std::string count_ecs;
   std::string count_txp;
   std::string count_split;
-  int count_mtx_priority = 0;
   bool count_em = false;
   bool count_cm = false;
   bool count_collapse = false;
diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index e0d125f..f155669 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -23,7 +23,9 @@ void bustools_count(Bustools_opt &opt) {
 
   u_map_<std::string, int32_t> txnames;
   auto txnames_split = txnames; // copy
-  std::vector<int32_t> tx_split;
+  std::vector<int32_t> tx_split; // Store transcript names for split
+  std::vector<int32_t> tx_split_lookup; // Map transcript IDs to mtx status
+  int count_mtx_priority = !opt.count_gene_multimapping ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons]
   parseTranscripts(opt.count_txp, txnames);
   std::vector<int32_t> genemap(txnames.size(), -1);
   u_map_<std::string, int32_t> genenames;
@@ -34,7 +36,7 @@ void bustools_count(Bustools_opt &opt) {
   for (int32_t ec = 0; ec < ecmap.size(); ec++) {
     ecmapinv.insert({ecmap[ec], ec});
   }
-  std::vector<std::vector<int32_t>> ec2genes;        
+  std::vector<std::vector<int32_t>> ec2genes, ec2genes_priority;
   create_ec2genes(ecmap, genemap, ec2genes);
 
 
@@ -64,6 +66,7 @@ void bustools_count(Bustools_opt &opt) {
   if (count_split) {
     parseTranscripts(opt.count_split, txnames_split); // subset of txnames
     tx_split.reserve(txnames_split.size());
+    tx_split_lookup.resize(txnames.size(), -1);
     for (auto x : txnames_split) {
         if (txnames.count(x.first)) tx_split.push_back(txnames[x.first]);
     }
@@ -71,6 +74,25 @@ void bustools_count(Bustools_opt &opt) {
     of_A.open(mtx_ofn_split_A);
     of_2 << ssHeader.str();
     of_A << ssHeader.str();
+    auto ecmap_ = ecmap; // copy
+    for (int ec = 0; ec < ecmap.size(); ec++) { // Get new ecmap based on split
+      for (auto tx : ecmap[ec]) {
+        auto &new_ec = ecmap_[ec];
+	bool found = std::find(tx_split.begin(), tx_split.end(), tx) != tx_split.end();
+	tx_split_lookup[tx] = found;
+        // Remove transcripts depending on whether they're found in tx_split
+        // Note: It is possible for one of the new ECs to be empty, in which case intersect_genes_of_ecs will result in the empty set for glist
+        // Essentially, we are removing all tx's that belong to (or not belong to) tx_split in the equivalence classes
+        // This handles instances in which a read maps to exon of one gene but intron of another (likely overlapping) gene to avoid discarding the record
+        // This is done at read-level (not UMI-level) so if one UMI maps to one gene's exon but another UMI maps to the other gene's intron, we still discard it
+        if (count_mtx_priority == 1 && !found)
+          new_ec.erase(std::remove(new_ec.begin(), new_ec.end(), tx), new_ec.end());
+        else if (count_mtx_priority == 2 && found)
+          new_ec.erase(std::remove(new_ec.begin(), new_ec.end(), tx), new_ec.end());
+      }
+    }
+    if (count_mtx_priority != 0)
+    create_ec2genes(ecmap_, genemap, ec2genes_priority); // Note: Some ECs may not be associated with any genes (i.e. empty vector)
   }
   of.open(mtx_ofn);
   of << ssHeader.str();
@@ -151,6 +173,7 @@ void bustools_count(Bustools_opt &opt) {
       
       if (opt.umi_gene_collapse) {
         intersect_genes_of_ecs(ecs,ec2genes, glist);
+	if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist);
       }
       if (opt.umi_gene_collapse && glist.size() == 0) {
         // Gene-intersection zero, check for UMI collision
@@ -160,6 +183,7 @@ void bustools_count(Bustools_opt &opt) {
           for (size_t k = 0; k < ecs.size(); k++) {
             ecs_within_molecule.push_back(ecs[k]);
             intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist);
+	    if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist);
             if (glist.size() == 0) {
               ecs_within_molecule.pop_back();
             } else {
@@ -233,7 +257,7 @@ void bustools_count(Bustools_opt &opt) {
         }
       }
       double val = j-i;
-      auto which_mtx = intersect_ecs_with_subset_txs(column_v[i], ecmap, tx_split);
+      auto which_mtx = intersect_ecs_with_subset_txs(column_v[i], ecmap, tx_split_lookup);
       auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A);
       auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A);
       of_ << n_rows << " " << (column_v[i]+1) << " " << val << "\n";
@@ -275,6 +299,7 @@ void bustools_count(Bustools_opt &opt) {
       }
 
       intersect_genes_of_ecs(ecs,ec2genes, glist);
+      if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist);
       int gn = glist.size();
       if (opt.count_downsampling_factor != 1.0) {
         uint32_t newCounts = 0;
@@ -289,7 +314,7 @@ void bustools_count(Bustools_opt &opt) {
         }
       }
       if (gn > 0) {
-        auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split);
+        auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split_lookup);
         if (opt.count_gene_multimapping) {
           for (auto x : glist) {
             column_vp.push_back({x, {(opt.count_raw_counts ? counts : 1.0)/gn, which_mtx}});
@@ -342,6 +367,7 @@ void bustools_count(Bustools_opt &opt) {
           for (size_t k = 0; k < ecs.size(); k++) {
             ecs_within_molecule.push_back(ecs[k]);
             intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist);
+	    if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist);
             if (glist.size() == 0) {
               ecs_within_molecule.pop_back();
             } else {
@@ -351,10 +377,11 @@ void bustools_count(Bustools_opt &opt) {
           }
           if (glist.size() == 0) {
             intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist);
+	    if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist);
           }
           gn = glist.size();
           if (gn > 0) {
-            auto which_mtx = intersect_ecs_with_subset_txs(ecs_within_molecule, ecmap, tx_split);
+            auto which_mtx = intersect_ecs_with_subset_txs(ecs_within_molecule, ecmap, tx_split_lookup);
             if (opt.count_gene_multimapping) {
               for (auto x : glist) {
                 column_vp.push_back({x, {1.0/gn, which_mtx}});
@@ -375,9 +402,10 @@ void bustools_count(Bustools_opt &opt) {
       ecs.push_back(v[i].ec);
       
       intersect_genes_of_ecs(ecs, ec2genes, glist);
+      if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist);
       int gn = glist.size();
       if (gn > 0) {
-        auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split);
+        auto which_mtx = intersect_ecs_with_subset_txs(ecs, ecmap, tx_split_lookup);
         if (opt.count_gene_multimapping) {
           for (auto x : glist) {
             column_vp.push_back({x, {v[i].count/gn, which_mtx}});
@@ -532,6 +560,7 @@ void bustools_count(Bustools_opt &opt) {
         ecs.resize(0);
         ecs.push_back(ec);
         intersect_genes_of_ecs(ecs, ec2genes, glist);
+	if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs, ec2genes_priority, glist);
         int gn = glist.size();
         if (gn != 1) {
           continue;
@@ -550,7 +579,7 @@ void bustools_count(Bustools_opt &opt) {
         }
         val += column_vp[j].second.first;
       }
-      auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split);
+      auto which_mtx = intersect_ecs_with_subset_txs(column_vp[i].first, ecmap, tx_split_lookup);
       auto& of_ = which_mtx == COUNT_DEFAULT ? of : (which_mtx == COUNT_SPLIT ? of_2 : of_A);
       auto& n_entries_ = which_mtx == COUNT_DEFAULT ? n_entries : (which_mtx == COUNT_SPLIT ? n_entries_2 : n_entries_A);
       of_ << n_rows << " " << (column_vp[i].first+1) << " " << val << "\n";
diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index 24c8814..d19d01e 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -335,8 +335,6 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
   int cm_flag = 0;
   int hist_flag = 0;
   int rawcounts_flag = 0;
-  int priority_one = 0;
-  int priority_two = 0;
   static struct option long_options[] = {
     {"output", required_argument, 0, 'o'},
     {"genemap", required_argument, 0, 'g'},
@@ -351,8 +349,6 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
     {"downsample", required_argument, 0, 'd'},
     {"rawcounts", no_argument, &rawcounts_flag, 1},
     {"split", required_argument, 0, 's'},
-    {"priority-1", no_argument, &priority_one, 1},
-    {"priority-2", no_argument, &priority_two, 1},
     {0, 0, 0, 0}};
   
   int option_index = 0, c;
@@ -405,15 +401,6 @@ void parse_ProgramOptions_count(int argc, char **argv, Bustools_opt &opt)
   if (rawcounts_flag) {
     opt.count_raw_counts = true;
   }
-  if (priority_one) {
-    opt.count_mtx_priority = 1;
-  }
-  if (priority_two) {
-    opt.count_mtx_priority = 2;
-  }
-  if (priority_one && priority_two) {
-    opt.count_mtx_priority = -1; // Can't supply both, raise an error later
-  }
   
   while (optind < argc)
     opt.files.push_back(argv[optind++]);
@@ -1767,15 +1754,6 @@ bool check_ProgramOptions_count(Bustools_opt &opt)
     }
   }
   
-  if (opt.count_mtx_priority == -1) {
-    std::cerr << "Error: Cannot specify multiply options for priority " << std::endl;
-    ret = false;
-  }
-  if (opt.count_mtx_priority > 0 && opt.count_split.size() == 0) {
-    std::cerr << "Error: Cannot use priority unless -s is specified " << std::endl;
-    ret = false;
-  }
-  
   return ret;
 }
 
@@ -2664,8 +2642,6 @@ void Bustools_count_Usage()
             << "    --em              Estimate gene abundances using EM algorithm" << std::endl
             << "    --cm              Count multiplicites instead of UMIs" << std::endl
             << "-s, --split           Split output matrix in two (plus ambiguous) based on transcripts supplied in this file" << std::endl
-            << "    --priority-1      For --split, prioritize first matrix in split matrix for UMIs that multimap to both splits" << std::endl
-            << "    --priority-2      For --split, prioritize second matrix in split matrix for UMIs that multimap to both splits" << std::endl
             << "-m, --multimapping    Include bus records that pseudoalign to multiple genes" << std::endl
             << "    --hist            Output copy per UMI histograms for all genes" << std::endl 
             << "-d  --downsample      Specify a factor between 0 and 1 specifying how much to downsample" << std::endl 

From 7ef8da389cfdb8f86dd5be01ee8d310e09d53bbf Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Wed, 22 Mar 2023 08:14:31 -0700
Subject: [PATCH 31/49] update count to output barcode prefix

---
 src/bustools_count.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index f155669..8852722 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -48,6 +48,7 @@ void bustools_count(Bustools_opt &opt) {
   std::string mtx_ofn_split_2 = opt.output + ".2.mtx";
   std::string mtx_ofn_split_A = opt.output + ".ambiguous.mtx";
   std::string barcodes_ofn = opt.output + ".barcodes.txt";
+  std::string barcodes_prefix_ofn = opt.output + ".barcodes.prefix.txt";
   std::string ec_ofn = opt.output + ".ec.txt";
   std::string gene_ofn = opt.output + ".genes.txt";
   std::string hist_ofn = opt.output + ".hist.txt";
@@ -691,11 +692,22 @@ void bustools_count(Bustools_opt &opt) {
     writeGenes(gene_ofn, genenames);
   }
   // write barcode file
+  bool write_prefix = false;
   std::ofstream bcof;
   bcof.open(barcodes_ofn);
+  uint64_t len_mask = ((1ULL << (2*bclen)) - 1);
   for (const auto &x : barcodes) {
+    if (x != (x & len_mask)) write_prefix = true;
     bcof << binaryToString(x, bclen) << "\n";
   }
+  if (write_prefix) {
+    std::ofstream bcprefixof;
+    bcprefixof.open(barcodes_prefix_ofn);
+    for (const auto &x : barcodes) {
+      bcprefixof << binaryToString(x >> (2*bclen), 32-bclen) << "\n";
+    }
+    bcprefixof.close();
+  }
   bcof.close();
 
   //write histogram file

From 5dc2bcf781b751a0d5cf9f51923427802d8cceb3 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Wed, 22 Mar 2023 08:15:54 -0700
Subject: [PATCH 32/49] cleanup len_mask in bustools correct

---
 src/bustools_correct.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index bfeec77..c854e35 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -258,7 +258,7 @@ void bustools_split_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
-    uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
+    uint64_t len_mask = ((1ULL << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));
@@ -534,7 +534,7 @@ void bustools_correct(Bustools_opt &opt)
     }
 
     int rc = 0;
-    uint64_t len_mask = (((uint64_t)1 << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
+    uint64_t len_mask = ((1ULL << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
     while (true)
     {
       in.read((char *)p, N * sizeof(BUSData));

From 44d724c95e9e563639e7ead0a87ea0473d268473 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Wed, 22 Mar 2023 09:16:12 -0700
Subject: [PATCH 33/49] fix count_mtx_priority

---
 src/bustools_count.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index 8852722..f3d1a19 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -25,7 +25,8 @@ void bustools_count(Bustools_opt &opt) {
   auto txnames_split = txnames; // copy
   std::vector<int32_t> tx_split; // Store transcript names for split
   std::vector<int32_t> tx_split_lookup; // Map transcript IDs to mtx status
-  int count_mtx_priority = !opt.count_gene_multimapping ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons]
+  bool count_split = !opt.count_split.empty();
+  int count_mtx_priority = !opt.count_gene_multimapping && count_split ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons]
   parseTranscripts(opt.count_txp, txnames);
   std::vector<int32_t> genemap(txnames.size(), -1);
   u_map_<std::string, int32_t> genenames;
@@ -40,7 +41,6 @@ void bustools_count(Bustools_opt &opt) {
   create_ec2genes(ecmap, genemap, ec2genes);
 
 
-  bool count_split = !opt.count_split.empty();
   std::ofstream of;
   std::ofstream of_2;
   std::ofstream of_A;

From f5e9de4daa2e70c83b97d3808f89720c40905237 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 27 Mar 2023 01:48:30 -0700
Subject: [PATCH 34/49] fix count_mtx_priority w/ UMI collision logic

---
 src/bustools_count.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index f3d1a19..6289545 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -184,7 +184,6 @@ void bustools_count(Bustools_opt &opt) {
           for (size_t k = 0; k < ecs.size(); k++) {
             ecs_within_molecule.push_back(ecs[k]);
             intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist);
-	    if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist);
             if (glist.size() == 0) {
               ecs_within_molecule.pop_back();
             } else {
@@ -368,7 +367,6 @@ void bustools_count(Bustools_opt &opt) {
           for (size_t k = 0; k < ecs.size(); k++) {
             ecs_within_molecule.push_back(ecs[k]);
             intersect_genes_of_ecs(ecs_within_molecule, ec2genes, glist);
-	    if (count_mtx_priority != 0 && glist.size() > 1) intersect_genes_of_ecs(ecs_within_molecule, ec2genes_priority, glist);
             if (glist.size() == 0) {
               ecs_within_molecule.pop_back();
             } else {

From c31d2f11c5bf7a847310f0759f803d9fbee9d480 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 10 Apr 2023 21:29:04 -0700
Subject: [PATCH 35/49] Try multicomponent barcodes

---
 src/bustools_correct.cpp | 254 +++++++++++++++++++++++----------------
 1 file changed, 151 insertions(+), 103 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index c854e35..dc55e42 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -416,10 +416,9 @@ void bustools_split_correct(Bustools_opt &opt)
   p = nullptr;
 }
 
-void bustools_correct(Bustools_opt &opt)
-{
+void bustools_correct(Bustools_opt &opt) {
   uint32_t bclen = 0;
-  uint32_t wc_bclen = 0;
+  std::vector<uint32_t> wc_bclen;
   uint32_t umilen = 0;
   BUSHeader h;
   size_t nr = 0;
@@ -435,57 +434,94 @@ void bustools_correct(Bustools_opt &opt)
   bool dump_bool = opt.dump_bool;
 
   std::ofstream of;
-  if (dump_bool)
-  {
+  if (dump_bool) {
     of.open(opt.dump);
   }
 
   std::ifstream wf(opt.whitelist, std::ios::in);
   std::string line;
   line.reserve(100);
-  std::unordered_set<uint64_t> wbc;
-  wbc.reserve(100000);
+  std::vector<std::unordered_set<uint64_t> > wbc; // Each set contains whitelist
   uint32_t f = 0;
-  while (std::getline(wf, line))
-  {
-    if (wc_bclen == 0)
-    {
-      wc_bclen = line.size();
+  bool first_line = true;
+  while (std::getline(wf, line)) {
+    std::stringstream ss(line);
+    std::string barcode;
+    int i = 0;
+    while (ss >> barcode) {
+      std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper);
+      uint64_t bc = stringToBinary(barcode, f);
+      if (first_line) {
+        std::unordered_set<uint64_t> bc_set;
+        bc_set.insert(bc);
+        wbc.push_back(bc_set);
+        wbc[i].reserve(100000);
+        wc_bclen.push_back(barcode.size());
+      } else if (i >= wbc.size()) { // Too many barcodes in this line
+        std::cerr << "Error: whitelist file malformed; encountered " << (i+1)
+                  << " barcodes on a line while " << wbc.size() << " barcodes on another line"
+                  << std::endl;
+        exit(1);
+      } else if (barcode.length() != wc_bclen[i]) {
+        std::cerr << "Error: whitelist file malformed; encountered barcode length " << wc_bclen[i]
+                  << " on a line while " << wbc[i].length() << " barcodes on another line"
+                  << std::endl;
+        exit(1);
+      } else {
+        wbc[i].insert(bc);
+      }
+      i++;
     }
-    uint64_t bc = stringToBinary(line, f);
-    wbc.insert(bc);
+    if (i != wbc[i].size()) { // Incorrect number of barcodes on this line
+      std::cerr << "Error: whitelist file malformed; encountered " << (i+1)
+                << " barcodes on a line while " << wbc.size() << " barcodes on another line"
+                << std::endl;
+      exit(1);
+    }
+    first_line = false;
   }
   wf.close();
 
-  std::cerr << "Found " << wbc.size() << " barcodes in the whitelist" << std::endl;
-
-  // split barcode into upper and lower half
-  size_t bc2 = (wc_bclen + 1) / 2;
-
-  std::vector<std::pair<Roaring, Roaring>> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes
-
-  uint64_t mask_size = (1ULL << (2 * bc2));
-  uint64_t lower_mask = (1ULL << (2 * bc2)) - 1;
-  uint64_t upper_mask = (1ULL << (2 * (wc_bclen - bc2))) - 1;
-  for (uint64_t b : wbc)
-  {
-    uint64_t lb = b & lower_mask;
-    uint64_t ub = (b >> (2 * bc2)) & upper_mask;
-
-    correct[ub].second.add(lb);
-    correct[lb].first.add(ub);
+  if (wbc.size() == 0) {
+    std::cerr << "Error: whitelist file malformed; no barcodes found" <<std::endl;
+    exit(1);
+  }
+  
+  std::cerr << "Found " << wbc[0].size() << " barcodes in the whitelist" << std::endl;
+  if (wbc.size() > 1) {
+    std::cerr << "Found " << wbc.size() << " barcode sets" << std::endl;
+  }
+  
+  // split barcode into upper and lower half (across all barcodes in a barcode set)
+  std::vector<std::vector<std::pair<Roaring, Roaring>>> correct_vec; // size of vector = how many barcode sets there are
+  std::vector<std::pair<uint64_t,uint64_t> > lower_upper_mask_vec; // size of vector = how many barcode sets there are
+  std::vector<size_t > bc2_vec; // size of vector = how many barcode sets there are
+  for (int i = 0; i < wc_bclen.size() : i++) {
+    auto bclen2 = wc_bclen[i]; // i = index of current barcode set
+    size_t bc2 = (bclen2 + 1) / 2;
+    std::vector<std::pair<Roaring, Roaring>> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes
+    uint64_t mask_size = (1ULL << (2 * bc2));
+    uint64_t lower_mask = (1ULL << (2 * bc2)) - 1;
+    uint64_t upper_mask = (1ULL << (2 * (wc_bclen - bc2))) - 1;
+    for (uint64_t b : wbc[i]) { // Iterate through barcodes of current barcode set
+      uint64_t lb = b & lower_mask;
+      uint64_t ub = (b >> (2 * bc2)) & upper_mask;
+      correct[ub].second.add(lb);
+      correct[lb].first.add(ub);
+    }
+    correct_vec.push_back(std::move(correct));
+    lower_upper_mask_vec.push_back(std::make_pair(lower_mask, upper_mask));
+    bc2_vec.push_back(bc2);
   }
 
   std::streambuf *buf = nullptr;
   std::ofstream busf_out;
 
-  if (!opt.stream_out)
-  {
+  if (!opt.stream_out) {
     busf_out.open(opt.output, std::ios::out | std::ios::binary);
     buf = busf_out.rdbuf();
   }
-  else
-  {
+  else {
     buf = std::cout.rdbuf();
   }
   std::ostream bus_out(buf);
@@ -494,42 +530,39 @@ void bustools_correct(Bustools_opt &opt)
 
   nr = 0;
   BUSData bd;
-  for (const auto &infn : opt.files)
-  {
+  for (const auto &infn : opt.files) {
     std::streambuf *inbuf;
     std::ifstream inf;
-    if (!opt.stream_in)
-    {
+    if (!opt.stream_in) {
       inf.open(infn.c_str(), std::ios::binary);
       inbuf = inf.rdbuf();
-    }
-    else
-    {
+    } else {
       inbuf = std::cin.rdbuf();
     }
     std::istream in(inbuf);
     parseHeader(in, h);
 
-    if (!outheader_written)
-    {
+    if (!outheader_written) {
       writeHeader(bus_out, h);
       outheader_written = true;
     }
 
-    if (bclen == 0)
-    {
+    if (bclen == 0) {
       bclen = h.bclen;
+      size_t final_wc_bclen = 0;
+      
+      for (auto l : wc_bclen) {
+        final_wc_bclen += l;
+      }
 
-      if (bclen != wc_bclen)
-      {
+      if (bclen != final_wc_bclen) {
         std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << wc_bclen << std::endl
                   << "       check that your whitelist matches the technology used" << std::endl;
 
         exit(1);
       }
     }
-    if (umilen == 0)
-    {
+    if (umilen == 0) {
       umilen = h.umilen;
     }
 
@@ -539,67 +572,82 @@ void bustools_correct(Bustools_opt &opt)
     {
       in.read((char *)p, N * sizeof(BUSData));
       size_t rc = in.gcount() / sizeof(BUSData);
-      if (rc == 0)
-      {
+      if (rc == 0) {
         break;
       }
       nr += rc;
 
-      for (size_t i = 0; i < rc; i++)
-      {
+      for (size_t i = 0; i < rc; i++) {
         bd = p[i];
-        auto it = wbc.find(bd.barcode & len_mask);
-        if (it != wbc.end())
-        {
-          stat_white++;
-          bus_out.write((char *)&bd, sizeof(bd));
-        }
-        else
-        {
-          uint64_t b = bd.barcode & len_mask;
-          uint64_t lb = b & lower_mask;
-          uint64_t ub = (b >> (2 * bc2)) & upper_mask;
-          uint64_t lbc = 0, ubc = 0;
-          int correct_lower = search_for_mismatch(correct[ub].second, bc2, lb, lbc);
-          int correct_upper = search_for_mismatch(correct[lb].first, wc_bclen - bc2, ub, ubc);
-          int nc = correct_lower + correct_upper;
-          if (nc != 1)
-          {
-            stat_uncorr++;
+        uint64_t b = bd.barcode & len_mask;
+        uint64_t running_len = 0;
+        size_t stat_white_ = 0;
+        size_t stat_uncorr_ = 0;
+        size_t stat_corr_ = 0;
+        uint64_t correction = 0;
+        std::vector<std::pair<uint64_t,uint32_t> > correction; // TODO: pair: first = corrected barcode; second = length
+        correction.resize(wbc.size());
+        for (int j = wbc.size()-1; j >= 0; j--) { // Iterate through all the barcode sets
+          auto bclen2 = wc_bclen[j];
+          running_len += bclen2;
+          uint64_t shift_len = 2*(running_len-bclen2); // used for masking out the least significant bits up to the current barcode
+          uint64_t len_mask2 = 0; // This mask = Only consider these bits (based on each barcode set)
+          len_mask2 = ((1ULL << (2*running_len)) - 1);
+          if (shift_len != 0) {
+            len_mask2 &= (~((1ULL << (shift_len)) - 1));
           }
-          else if (nc == 1)
-          {
-            if (correct_lower == 1)
-            {
-              uint64_t b_corrected = (ub << (2 * bc2)) | lbc;
-              if (dump_bool)
-              {
-                if ((bd.barcode & len_mask) != old_barcode)
-                {
-                  of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
-                  old_barcode = bd.barcode & len_mask;
-                }
+          len_mask2 &= ((1ULL << (2*running_len)) - 1);
+          len_mask2 &= len_mask; // not necessary
+          b &= len_mask2;
+          auto it = wbc[j].find(b);
+          if (it != wbc[j].end()) { // Barcode is in the whitelist
+            stat_white_++;
+            correction |= (b & len_mask2);
+          } else {
+            auto lower_mask = lower_upper_mask_vec[j].first;
+            auto upper_mask = lower_upper_mask_vec[j].second;
+            auto bc2 = bc2_vec[j];
+            auto& correct = correct_vec[j];
+            uint64_t lb = b & lower_mask;
+            uint64_t ub = (b >> (2 * bc2)) & upper_mask;
+            uint64_t lbc = 0, ubc = 0;
+            int correct_lower = search_for_mismatch(correct[ub].second, bc2, lb, lbc);
+            int correct_upper = search_for_mismatch(correct[lb].first, bclen2 - bc2, ub, ubc);
+            int nc = correct_lower + correct_upper;
+            if (nc != 1) { // Uncorrected
+              stat_uncorr_++;
+              break;
+            } else if (nc == 1) {
+              stat_corr_++;
+              if (correct_lower == 1) {
+                uint64_t b_corrected = (ub << (2 * bc2)) | lbc;
+                b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location
+                b_corrected &= len_mask2;
+                correction |= b_corrected; // Add onto existing correction
+              } else if (correct_upper == 1) {
+                uint64_t b_corrected = (ubc << (2 * bc2)) | lb;
+                b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location
+                b_corrected &= len_mask2;
+                correction |= b_corrected; // Add onto existing correction
               }
-
-              bd.barcode = b_corrected | (bd.barcode & ~len_mask);
-              bus_out.write((char *)&bd, sizeof(bd));
-              stat_corr++;
             }
-            else if (correct_upper == 1)
-            {
-              uint64_t b_corrected = (ubc << (2 * bc2)) | lb;
-              if (dump_bool)
-              {
-                if ((bd.barcode & len_mask) != old_barcode)
-                {
-                  of << binaryToString(bd.barcode & len_mask, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
-                  old_barcode = bd.barcode & len_mask;
-                }
-              }
-
-              bd.barcode = b_corrected | (bd.barcode & ~len_mask);
-              bus_out.write((char *)&bd, sizeof(bd));
-              stat_corr++;
+          }
+        }
+        if (stat_white_ == wbc.size()) {
+          stat_white++;
+          bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is
+        }
+        if (stat_uncorr_ == wbc.size()) {
+          stat_uncorr++; // Uncorrected; do not write BUS record
+        }
+        if (stat_corr_ > 0) {
+          stat_corr++; // Corrected; and write it out
+          bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length
+          bus_out.write((char *)&bd, sizeof(bd));
+          if (dump_bool) {
+            if (b != old_barcode) {
+              of << binaryToString(b, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
+              old_barcode = b & len_mask;
             }
           }
         }

From 49d69adb60d626f3286e2130ecf0dabd10702953 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 10 Apr 2023 23:08:04 -0700
Subject: [PATCH 36/49] fixed stuff with multicomponent barcodes

---
 src/bustools_correct.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index dc55e42..ce9bb12 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -464,7 +464,7 @@ void bustools_correct(Bustools_opt &opt) {
         exit(1);
       } else if (barcode.length() != wc_bclen[i]) {
         std::cerr << "Error: whitelist file malformed; encountered barcode length " << wc_bclen[i]
-                  << " on a line while " << wbc[i].length() << " barcodes on another line"
+                  << " on a line but barcode length " << wc_bclen[i].length() << " on another line"
                   << std::endl;
         exit(1);
       } else {
@@ -496,7 +496,7 @@ void bustools_correct(Bustools_opt &opt) {
   std::vector<std::vector<std::pair<Roaring, Roaring>>> correct_vec; // size of vector = how many barcode sets there are
   std::vector<std::pair<uint64_t,uint64_t> > lower_upper_mask_vec; // size of vector = how many barcode sets there are
   std::vector<size_t > bc2_vec; // size of vector = how many barcode sets there are
-  for (int i = 0; i < wc_bclen.size() : i++) {
+  for (int i = 0; i < wc_bclen.size(); i++) {
     auto bclen2 = wc_bclen[i]; // i = index of current barcode set
     size_t bc2 = (bclen2 + 1) / 2;
     std::vector<std::pair<Roaring, Roaring>> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes
@@ -556,7 +556,7 @@ void bustools_correct(Bustools_opt &opt) {
       }
 
       if (bclen != final_wc_bclen) {
-        std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << wc_bclen << std::endl
+        std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << final_wc_bclen << std::endl
                   << "       check that your whitelist matches the technology used" << std::endl;
 
         exit(1);
@@ -585,8 +585,6 @@ void bustools_correct(Bustools_opt &opt) {
         size_t stat_uncorr_ = 0;
         size_t stat_corr_ = 0;
         uint64_t correction = 0;
-        std::vector<std::pair<uint64_t,uint32_t> > correction; // TODO: pair: first = corrected barcode; second = length
-        correction.resize(wbc.size());
         for (int j = wbc.size()-1; j >= 0; j--) { // Iterate through all the barcode sets
           auto bclen2 = wc_bclen[j];
           running_len += bclen2;
@@ -646,7 +644,7 @@ void bustools_correct(Bustools_opt &opt) {
           bus_out.write((char *)&bd, sizeof(bd));
           if (dump_bool) {
             if (b != old_barcode) {
-              of << binaryToString(b, bclen) << "\t" << binaryToString(b_corrected, bclen) << "\n";
+              of << binaryToString(b, bclen) << "\t" << binaryToString(correction, bclen) << "\n";
               old_barcode = b & len_mask;
             }
           }

From 8274a6fd1120a1b00a6c61cab0cf69879bfa9275 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 10 Apr 2023 23:27:39 -0700
Subject: [PATCH 37/49] more fixes

---
 src/bustools_correct.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index ce9bb12..24a5526 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -464,7 +464,7 @@ void bustools_correct(Bustools_opt &opt) {
         exit(1);
       } else if (barcode.length() != wc_bclen[i]) {
         std::cerr << "Error: whitelist file malformed; encountered barcode length " << wc_bclen[i]
-                  << " on a line but barcode length " << wc_bclen[i].length() << " on another line"
+                  << " on a line but barcode length " << barcode.length() << " on another line"
                   << std::endl;
         exit(1);
       } else {
@@ -502,7 +502,7 @@ void bustools_correct(Bustools_opt &opt) {
     std::vector<std::pair<Roaring, Roaring>> correct(1ULL << (2 * bc2)); // 4^(bc/2) possible barcodes
     uint64_t mask_size = (1ULL << (2 * bc2));
     uint64_t lower_mask = (1ULL << (2 * bc2)) - 1;
-    uint64_t upper_mask = (1ULL << (2 * (wc_bclen - bc2))) - 1;
+    uint64_t upper_mask = (1ULL << (2 * (bclen2 - bc2))) - 1;
     for (uint64_t b : wbc[i]) { // Iterate through barcodes of current barcode set
       uint64_t lb = b & lower_mask;
       uint64_t ub = (b >> (2 * bc2)) & upper_mask;

From 8e70f28b0f7c57bb1974d114dbe36abf9a34929c Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 11 Apr 2023 00:10:56 -0700
Subject: [PATCH 38/49] another fix

---
 src/bustools_correct.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 24a5526..80a06bd 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -472,8 +472,9 @@ void bustools_correct(Bustools_opt &opt) {
       }
       i++;
     }
-    if (i != wbc[i].size()) { // Incorrect number of barcodes on this line
-      std::cerr << "Error: whitelist file malformed; encountered " << (i+1)
+    if (i == 0) continue; // empty line
+    if (i != wbc.size()) { // Incorrect number of barcodes on this line
+      std::cerr << "Error: whitelist file malformed; encountered " << i
                 << " barcodes on a line while " << wbc.size() << " barcodes on another line"
                 << std::endl;
       exit(1);

From 6a18f286d3a238925b827d5f892bd79855fbc104 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 11 Apr 2023 02:53:21 -0700
Subject: [PATCH 39/49] some final fixes (hopefully)

---
 src/bustools_correct.cpp | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 80a06bd..84d70cd 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -597,18 +597,19 @@ void bustools_correct(Bustools_opt &opt) {
           }
           len_mask2 &= ((1ULL << (2*running_len)) - 1);
           len_mask2 &= len_mask; // not necessary
-          b &= len_mask2;
-          auto it = wbc[j].find(b);
+          uint64_t b_ = b & len_mask2; // The barcode alone in the location that it appears in
+          uint64_t b_shifted = b_ >> shift_len; // The barcode shifted into the least significant bits location
+          auto it = wbc[j].find(b_shifted);
           if (it != wbc[j].end()) { // Barcode is in the whitelist
             stat_white_++;
-            correction |= (b & len_mask2);
+            correction |= b_;
           } else {
             auto lower_mask = lower_upper_mask_vec[j].first;
             auto upper_mask = lower_upper_mask_vec[j].second;
             auto bc2 = bc2_vec[j];
             auto& correct = correct_vec[j];
-            uint64_t lb = b & lower_mask;
-            uint64_t ub = (b >> (2 * bc2)) & upper_mask;
+            uint64_t lb = b_shifted & lower_mask;
+            uint64_t ub = (b_shifted >> (2 * bc2)) & upper_mask;
             uint64_t lbc = 0, ubc = 0;
             int correct_lower = search_for_mismatch(correct[ub].second, bc2, lb, lbc);
             int correct_upper = search_for_mismatch(correct[lb].first, bclen2 - bc2, ub, ubc);
@@ -620,33 +621,33 @@ void bustools_correct(Bustools_opt &opt) {
               stat_corr_++;
               if (correct_lower == 1) {
                 uint64_t b_corrected = (ub << (2 * bc2)) | lbc;
-                b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location
+                b_corrected = b_corrected << shift_len; // We have the corrected barcode in the correct location
                 b_corrected &= len_mask2;
                 correction |= b_corrected; // Add onto existing correction
               } else if (correct_upper == 1) {
                 uint64_t b_corrected = (ubc << (2 * bc2)) | lb;
-                b_corrected = b_corrected << (2*shift_len); // We have the corrected barcode in the correct location
+                b_corrected = b_corrected << shift_len; // We have the corrected barcode in the correct location
                 b_corrected &= len_mask2;
                 correction |= b_corrected; // Add onto existing correction
               }
             }
           }
         }
-        if (stat_white_ == wbc.size()) {
+        if (stat_uncorr_ == 0 && stat_white_ == wbc.size()) {
           stat_white++;
           bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is
         }
         if (stat_uncorr_ == wbc.size()) {
           stat_uncorr++; // Uncorrected; do not write BUS record
         }
-        if (stat_corr_ > 0) {
+        if (stat_uncorr_ == 0 && stat_corr_ > 0) {
           stat_corr++; // Corrected; and write it out
           bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length
           bus_out.write((char *)&bd, sizeof(bd));
           if (dump_bool) {
             if (b != old_barcode) {
               of << binaryToString(b, bclen) << "\t" << binaryToString(correction, bclen) << "\n";
-              old_barcode = b & len_mask;
+              old_barcode = b;
             }
           }
         }
@@ -659,13 +660,11 @@ void bustools_correct(Bustools_opt &opt) {
             << "Corrected    = " << stat_corr << std::endl
             << "Uncorrected  = " << stat_uncorr << std::endl;
 
-  if (!opt.stream_out)
-  {
+  if (!opt.stream_out) {
     busf_out.close();
   }
 
-  if (opt.dump_bool)
-  {
+  if (opt.dump_bool) {
     of.close(); // if of is open
   }
 

From 5d09ac19511dfcebf0d483ac4953e117e6d5ecc4 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 11 Apr 2023 03:09:44 -0700
Subject: [PATCH 40/49] cleanup

---
 src/bustools_correct.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 84d70cd..4c4b9e5 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -633,14 +633,12 @@ void bustools_correct(Bustools_opt &opt) {
             }
           }
         }
-        if (stat_uncorr_ == 0 && stat_white_ == wbc.size()) {
+        if (stat_uncorr_ > 0) {
+          stat_uncorr++; // Uncorrected; do not write BUS record
+        } else if (stat_white_ == wbc.size()) {
           stat_white++;
           bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is
-        }
-        if (stat_uncorr_ == wbc.size()) {
-          stat_uncorr++; // Uncorrected; do not write BUS record
-        }
-        if (stat_uncorr_ == 0 && stat_corr_ > 0) {
+        } else if (stat_corr_ > 0) {
           stat_corr++; // Corrected; and write it out
           bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length
           bus_out.write((char *)&bd, sizeof(bd));

From 88ffe8d351b98a7be5fff3cc0fd952e8caca4529 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 11 Apr 2023 04:21:58 -0700
Subject: [PATCH 41/49] make multipart barcodes more lax/flexible

---
 src/bustools_correct.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 4c4b9e5..22e5054 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -451,15 +451,18 @@ void bustools_correct(Bustools_opt &opt) {
     while (ss >> barcode) {
       std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper);
       uint64_t bc = stringToBinary(barcode, f);
-      if (first_line) {
+      if (first_line) { // First line establishes all the barcode sets (can't have any empty barcodes here)
         std::unordered_set<uint64_t> bc_set;
         bc_set.insert(bc);
         wbc.push_back(bc_set);
         wbc[i].reserve(100000);
         wc_bclen.push_back(barcode.size());
+      } else if (barcode == "-") {
+        i++;
+        continue; // Empty barcode
       } else if (i >= wbc.size()) { // Too many barcodes in this line
         std::cerr << "Error: whitelist file malformed; encountered " << (i+1)
-                  << " barcodes on a line while " << wbc.size() << " barcodes on another line"
+                  << " barcodes on a line while " << wbc.size() << " barcodes on a previous line"
                   << std::endl;
         exit(1);
       } else if (barcode.length() != wc_bclen[i]) {
@@ -473,12 +476,6 @@ void bustools_correct(Bustools_opt &opt) {
       i++;
     }
     if (i == 0) continue; // empty line
-    if (i != wbc.size()) { // Incorrect number of barcodes on this line
-      std::cerr << "Error: whitelist file malformed; encountered " << i
-                << " barcodes on a line while " << wbc.size() << " barcodes on another line"
-                << std::endl;
-      exit(1);
-    }
     first_line = false;
   }
   wf.close();

From d04a222f400543a35fe21fe0ff803e758656b2a9 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 11 Apr 2023 04:48:59 -0700
Subject: [PATCH 42/49] Update bustools_correct.cpp

---
 src/bustools_correct.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 22e5054..ff2180d 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -484,8 +484,13 @@ void bustools_correct(Bustools_opt &opt) {
     std::cerr << "Error: whitelist file malformed; no barcodes found" <<std::endl;
     exit(1);
   }
-  
-  std::cerr << "Found " << wbc[0].size() << " barcodes in the whitelist" << std::endl;
+  size_t n_barcodes_in_whitelist = 0;
+  for (auto nbc : wbc) {
+    if (n_barcodes_in_whitelist < nbc.size()) {
+      n_barcodes_in_whitelist = nbc.size();
+    }
+  }
+  std::cerr << "Found " << n_barcodes_in_whitelist << " barcodes in the whitelist" << std::endl;
   if (wbc.size() > 1) {
     std::cerr << "Found " << wbc.size() << " barcode sets" << std::endl;
   }

From 58d547720ab72e357b687c565726caf8293d0a39 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Sun, 23 Apr 2023 18:33:47 -0700
Subject: [PATCH 43/49] bustools correct --replace: initial implementation

---
 src/Common.hpp           |   1 +
 src/bustools_correct.cpp | 169 +++++++++++++++++++++++++++++++++++++--
 src/bustools_main.cpp    |  12 ++-
 3 files changed, 171 insertions(+), 11 deletions(-)

diff --git a/src/Common.hpp b/src/Common.hpp
index 6a50ebd..b0282cc 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -85,6 +85,7 @@ struct Bustools_opt
   std::string dump;
   bool dump_bool = false;
   bool split_correct = false;
+  bool barcode_replacement = false;
 
   /* predict */
   std::string predict_input; //specified the same way as the output for count - count and histogram filenames will be created from this
diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index ff2180d..4019889 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -3,6 +3,7 @@
 #include <algorithm>
 
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include "Common.hpp"
@@ -416,7 +417,161 @@ void bustools_split_correct(Bustools_opt &opt)
   p = nullptr;
 }
 
+void bustools_correct_replace(Bustools_opt &opt) {
+  uint32_t bclen = 0;
+  uint32_t umilen = 0;
+  std::unordered_set<uint32_t> wc_bclen;
+  BUSHeader h;
+  size_t nr = 0;
+  size_t N = 100000;
+  BUSData *p = new BUSData[N];
+  char magic[4];
+  uint32_t version = 0;
+  size_t stat_white = 0;
+  size_t stat_uncorr = 0;
+  uint64_t old_barcode;
+  
+  bool dump_bool = opt.dump_bool;
+  std::ofstream of;
+  if (dump_bool) {
+    of.open(opt.dump);
+  }
+  
+  std::ifstream wf(opt.whitelist, std::ios::in);
+  std::string line;
+  line.reserve(100);
+  std::unordered_map<uint64_t,std::pair<uint64_t,char>> rp_map; // Replacement map (key = onlisted bc; pair.first = replacement bc; pair.second = type)
+  uint32_t f = 0;
+  
+  while (std::getline(wf, line)) {
+    std::stringstream ss(line);
+    std::string barcode;
+    std::string replacement;
+    ss >> barcode >> replacement;
+    if (barcode.empty() || replacement.empty()) continue;
+    if (!barcode.empty() && replacement.empty()) { 
+      std::cerr << "Error: replacement file malformed; no replacement found for barcode: " << barcode << std::endl;
+      exit(1);
+    }
+    std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper);
+    std::transform(replacement.begin(), replacement.end(), replacement.begin(), ::toupper);
+    uint64_t bc = stringToBinary(barcode, f);
+    uint64_t rp = stringToBinary(replacement, f);
+    rp_map.insert(std::make_pair(bc, std::make_pair(rp,0)));
+    wc_bclen.insert(barcode.length());
+    wc_bclen.insert(replacement.length());
+  }
+  wf.close();
+  
+  if (rp_map.size() == 0) {
+    std::cerr << "Error: replacement file malformed; no barcodes found" <<std::endl;
+    exit(1);
+  }
+  size_t n_barcodes_in_whitelist = rp_map.size();
+  std::cerr << "Found " << n_barcodes_in_whitelist << " barcodes in the replacement list" << std::endl;
+  
+  std::streambuf *buf = nullptr;
+  std::ofstream busf_out;
+  
+  if (!opt.stream_out) {
+    busf_out.open(opt.output, std::ios::out | std::ios::binary);
+    buf = busf_out.rdbuf();
+  }
+  else {
+    buf = std::cout.rdbuf();
+  }
+  std::ostream bus_out(buf);
+  
+  bool outheader_written = false;
+  
+  nr = 0;
+  BUSData bd;
+  for (const auto &infn : opt.files) {
+    std::streambuf *inbuf;
+    std::ifstream inf;
+    if (!opt.stream_in) {
+      inf.open(infn.c_str(), std::ios::binary);
+      inbuf = inf.rdbuf();
+    } else {
+      inbuf = std::cin.rdbuf();
+    }
+    std::istream in(inbuf);
+    parseHeader(in, h);
+    
+    if (!outheader_written) {
+      writeHeader(bus_out, h);
+      outheader_written = true;
+    }
+    
+    if (bclen == 0) {
+      bclen = h.bclen;
+      
+      for (auto l : wc_bclen) {
+        if (l != bclen) {
+          std::cerr << "Error: barcode length and replacement list length differ, barcodes = " << bclen << ", replacement list = " << l << std::endl;
+          exit(1);
+        }
+      }
+    }
+    if (umilen == 0) {
+      umilen = h.umilen;
+    }
+    
+    int rc = 0;
+    uint64_t len_mask = ((1ULL << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
+    while (true) {
+      in.read((char *)p, N * sizeof(BUSData));
+      size_t rc = in.gcount() / sizeof(BUSData);
+      if (rc == 0) {
+        break;
+      }
+      nr += rc;
+      
+      for (size_t i = 0; i < rc; i++) {
+        bd = p[i];
+        uint64_t b = bd.barcode & len_mask;
+        uint64_t correction = 0;
+        auto it = rp_map.find(b);
+        if (it != rp_map.end()) {
+          stat_white++;
+          correction = it->second.first;
+          bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length
+          bus_out.write((char *)&bd, sizeof(bd));
+          if (dump_bool) {
+            if (b != old_barcode) {
+              of << binaryToString(b, bclen) << "\t" << binaryToString(correction, bclen) << "\n";
+              old_barcode = b;
+            }
+          }
+        } else {
+          bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is
+          stat_uncorr++;
+        }
+      }
+    }
+  }
+  
+  std::cerr << "Processed " << nr << " BUS records" << std::endl
+            << "Replaced = " << stat_white << std::endl
+            << "Not replaced = " << stat_uncorr << std::endl;
+  
+  if (!opt.stream_out) {
+    busf_out.close();
+  }
+  
+  if (opt.dump_bool) {
+    of.close(); // if of is open
+  }
+  
+  delete[] p;
+  p = nullptr;
+}
+
 void bustools_correct(Bustools_opt &opt) {
+  if (opt.barcode_replacement) { // Run in replacement mode
+    bustools_correct_replace(opt);
+    return;
+  }
   uint32_t bclen = 0;
   std::vector<uint32_t> wc_bclen;
   uint32_t umilen = 0;
@@ -461,12 +616,12 @@ void bustools_correct(Bustools_opt &opt) {
         i++;
         continue; // Empty barcode
       } else if (i >= wbc.size()) { // Too many barcodes in this line
-        std::cerr << "Error: whitelist file malformed; encountered " << (i+1)
+        std::cerr << "Error: on-list file malformed; encountered " << (i+1)
                   << " barcodes on a line while " << wbc.size() << " barcodes on a previous line"
                   << std::endl;
         exit(1);
       } else if (barcode.length() != wc_bclen[i]) {
-        std::cerr << "Error: whitelist file malformed; encountered barcode length " << wc_bclen[i]
+        std::cerr << "Error: on-list file malformed; encountered barcode length " << wc_bclen[i]
                   << " on a line but barcode length " << barcode.length() << " on another line"
                   << std::endl;
         exit(1);
@@ -481,7 +636,7 @@ void bustools_correct(Bustools_opt &opt) {
   wf.close();
 
   if (wbc.size() == 0) {
-    std::cerr << "Error: whitelist file malformed; no barcodes found" <<std::endl;
+    std::cerr << "Error: on-list file malformed; no barcodes found" <<std::endl;
     exit(1);
   }
   size_t n_barcodes_in_whitelist = 0;
@@ -490,7 +645,7 @@ void bustools_correct(Bustools_opt &opt) {
       n_barcodes_in_whitelist = nbc.size();
     }
   }
-  std::cerr << "Found " << n_barcodes_in_whitelist << " barcodes in the whitelist" << std::endl;
+  std::cerr << "Found " << n_barcodes_in_whitelist << " barcodes in the on-list" << std::endl;
   if (wbc.size() > 1) {
     std::cerr << "Found " << wbc.size() << " barcode sets" << std::endl;
   }
@@ -559,8 +714,8 @@ void bustools_correct(Bustools_opt &opt) {
       }
 
       if (bclen != final_wc_bclen) {
-        std::cerr << "Error: barcode length and whitelist length differ, barcodes = " << bclen << ", whitelist = " << final_wc_bclen << std::endl
-                  << "       check that your whitelist matches the technology used" << std::endl;
+        std::cerr << "Error: barcode length and on-list length differ, barcodes = " << bclen << ", on-list = " << final_wc_bclen << std::endl
+                  << "       check that your on-list matches the technology used" << std::endl;
 
         exit(1);
       }
@@ -656,7 +811,7 @@ void bustools_correct(Bustools_opt &opt) {
   }
 
   std::cerr << "Processed " << nr << " BUS records" << std::endl
-            << "In whitelist = " << stat_white << std::endl
+            << "In on-list = " << stat_white << std::endl
             << "Corrected    = " << stat_corr << std::endl
             << "Uncorrected  = " << stat_uncorr << std::endl;
 
diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index d19d01e..580a810 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -576,13 +576,14 @@ void parse_ProgramOptions_fromtext(int argc, char **argv, Bustools_opt& opt) {
 void parse_ProgramOptions_correct(int argc, char **argv, Bustools_opt &opt)
 {
   
-  const char *opt_string = "o:w:d:sp";
+  const char *opt_string = "o:w:d:spr";
   static struct option long_options[] = {
     {"output", required_argument, 0, 'o'},
     {"whitelist", required_argument, 0, 'w'},
     {"dump", required_argument, 0, 'd'},
     {"split", no_argument, 0, 's'},
     {"pipe", no_argument, 0, 'p'},
+    {"replace", no_argument, 0, 'r'},
     {0, 0, 0, 0}};
   
   int option_index = 0, c;
@@ -608,6 +609,9 @@ void parse_ProgramOptions_correct(int argc, char **argv, Bustools_opt &opt)
     case 'p':
       opt.stream_out = true;
       break;
+    case 'r':
+      opt.barcode_replacement = true;
+      break;
     default:
       break;
     }
@@ -1604,7 +1608,7 @@ bool check_ProgramOptions_correct(Bustools_opt &opt)
   
   if (opt.whitelist.size() == 0)
   {
-    std::cerr << "Error: Missing whitelist file" << std::endl;
+    std::cerr << "Error: Missing on-list file" << std::endl;
     ret = false;
   }
   else
@@ -2621,10 +2625,10 @@ void Bustools_correct_Usage()
             << std::endl
             << "Options: " << std::endl
             << "-o, --output          File for corrected bus output" << std::endl
-            << "-w, --whitelist       File of whitelisted barcodes to correct to" << std::endl
+            << "-w, --whitelist       File of on-list barcodes to correct to" << std::endl
             << "-p, --pipe            Write to standard output" << std::endl
             << "-d, --dump            Dump uncorrected to corrected barcodes (optional)" << std::endl
-            << "-s, --split           Split the whitelist and correct each half independently (optional)" << std::endl
+            << "-r, --replace         The file of on-list barcodes is a barcode replacement file" << std::endl
             << std::endl;
 }
 

From 61f988349aaf0eaeb99edf6feb9accc7e9dd83b1 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 24 Apr 2023 04:47:16 -0700
Subject: [PATCH 44/49] bustools correct: more features for replace

---
 src/Common.hpp           |  1 +
 src/bustools_correct.cpp | 88 ++++++++++++++++++++++++++++++++++++----
 src/bustools_main.cpp    |  7 +++-
 src/bustools_text.cpp    |  3 ++
 4 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/src/Common.hpp b/src/Common.hpp
index b0282cc..e6cfb27 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -109,6 +109,7 @@ struct Bustools_opt
   /* text */
   bool text_dumpflags = false;
   bool text_dumppad = false;
+  bool text_showall = false;
 
   /* linker */
   int start, end;
diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 4019889..0b2edc2 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -420,7 +420,9 @@ void bustools_split_correct(Bustools_opt &opt)
 void bustools_correct_replace(Bustools_opt &opt) {
   uint32_t bclen = 0;
   uint32_t umilen = 0;
+  uint32_t rplen = 0;
   std::unordered_set<uint32_t> wc_bclen;
+  std::unordered_set<uint32_t> r_len;
   BUSHeader h;
   size_t nr = 0;
   size_t N = 100000;
@@ -430,6 +432,15 @@ void bustools_correct_replace(Bustools_opt &opt) {
   size_t stat_white = 0;
   size_t stat_uncorr = 0;
   uint64_t old_barcode;
+  enum replacement_type { bc_record, msb_meta, lsb_meta, msb_bus, lsb_bus };
+  
+  // There are five replacement types:
+  // bc_record: simply replace the barcode of length 'bclen' with another barcode of length 'bclen' (while preserving metadata)
+  // msb_meta: put replacement (format: NNNN<) into most significant bits of metadata in BUS record (preserve barcode record)
+  // lsb_meta: put replacement (format: <NNNN) into least significant bits of metadata in BUS record (existing metadata shifted left; preserve barcode record)
+  // msb_bus: put replacement (format: NNNN*) into most significant bits of barcode (replacing existing bits of barcode)
+  // lsb_bus: put replacement (format: *NNNN) into least significant bits of barcode (replacing existing bits of barcode)
+  
   
   bool dump_bool = opt.dump_bool;
   std::ofstream of;
@@ -455,11 +466,29 @@ void bustools_correct_replace(Bustools_opt &opt) {
     }
     std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper);
     std::transform(replacement.begin(), replacement.end(), replacement.begin(), ::toupper);
+    replacement_type rtype = bc_record;
+    if (replacement[0] == '<') rtype = lsb_meta;
+    if (replacement[0] == '*') rtype = lsb_bus;
+    if (rtype != bc_record) {
+      replacement = replacement.substr(1);
+    } else {
+      if (replacement[replacement.length()-1] == '<') rtype = msb_meta;
+      if (replacement[replacement.length()-1] == '*') rtype = msb_bus;
+      if (rtype != bc_record) {
+        replacement = replacement.substr(0, replacement.length()-1);
+      }
+    }
+    if (replacement.length() == 0) continue;
+
     uint64_t bc = stringToBinary(barcode, f);
     uint64_t rp = stringToBinary(replacement, f);
-    rp_map.insert(std::make_pair(bc, std::make_pair(rp,0)));
+    if (rp_map.find(bc) != rp_map.end()) {
+      std::cerr << "Error: Duplicate entries found: " << barcode << std::endl;
+      exit(1);
+    }
+    rp_map.insert(std::make_pair(bc, std::make_pair(rp,rtype)));
     wc_bclen.insert(barcode.length());
-    wc_bclen.insert(replacement.length());
+    r_len.insert(replacement.length());
   }
   wf.close();
   
@@ -505,13 +534,15 @@ void bustools_correct_replace(Bustools_opt &opt) {
     
     if (bclen == 0) {
       bclen = h.bclen;
-      
-      for (auto l : wc_bclen) {
-        if (l != bclen) {
-          std::cerr << "Error: barcode length and replacement list length differ, barcodes = " << bclen << ", replacement list = " << l << std::endl;
-          exit(1);
-        }
+      if (wc_bclen.size() != 1) {
+        std::cerr << "Error: barcode length in list inconsistent" << std::endl;
+        exit(1);
       }
+      if (r_len.size() != 1) {
+        std::cerr << "Error: replacement length in list inconsistent" << std::endl;
+        exit(1);
+      }
+      rplen = *(r_len.begin());
     }
     if (umilen == 0) {
       umilen = h.umilen;
@@ -535,7 +566,46 @@ void bustools_correct_replace(Bustools_opt &opt) {
         if (it != rp_map.end()) {
           stat_white++;
           correction = it->second.first;
-          bd.barcode = correction | (bd.barcode & ~len_mask); // Correction plus preserve the metadata bits outside barcode length
+          auto rtype = it->second.second;
+          switch (rtype) {
+            case bc_record:
+            {
+              uint64_t len_mask2 = ((1ULL << (2*std::max(rplen, bclen))) - 1); // n least significant bits where n=2*max(rplen,bclen) [if rplen > bclen, overwrite based on rplen]
+              bd.barcode = correction | (bd.barcode & ~len_mask2); // Correction plus preserve the metadata bits outside barcode length (or overwrites the part where the barcode length exceeds it)
+              break;
+            }
+            case msb_meta:
+            {
+              uint64_t clen = 32-rplen; // 32 minus correction length
+              bd.barcode = bd.barcode & ((1ULL << (2*clen)) - 1); // Unset the MSBs
+              bd.barcode = (correction << (2*clen) ) | bd.barcode; // Shift the corrected sequence into the MSBs
+              break;
+            }
+            case lsb_meta:
+            {
+              uint64_t original_bc = bd.barcode & len_mask; // The original barcode sequence (no metadata)
+              bd.barcode = bd.barcode << (2*rplen); // Shift the barcode+metadata to the left based on rplen
+              uint64_t mlen = (rplen+bclen); // How much space the new metadata plus the original barcode will take up
+              bd.barcode = bd.barcode & (~((1ULL << (2*mlen)) - 1)); // Preserve only the bits containing the (shifted) metadata
+              bd.barcode = bd.barcode | original_bc; // Throw the original barcode back in
+              bd.barcode = (correction << (2*bclen) ) | bd.barcode; // Throw the new metadata in
+              break;
+            }
+            case msb_bus:
+            {
+              if (bclen >= rplen) { // Only do the substitution if barcode encapsulates the replacement
+                uint64_t mdata = bd.barcode & (~((1ULL << (2*bclen)) - 1)); // Preserve only the bits containing the metadata (not the barcode)
+                uint64_t bdata = bd.barcode & ((1ULL << (2*(bclen-rplen))) - 1); // Preserve only the bits containing the part of the barcode we want to keep
+                bd.barcode = (mdata | bdata) | (correction << (2*(bclen-rplen))); // Merge everything together
+              }
+              break;
+            }
+            case lsb_bus:
+            {
+              bd.barcode = correction | (bd.barcode & (~(1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in
+              break;
+            }
+          }
           bus_out.write((char *)&bd, sizeof(bd));
           if (dump_bool) {
             if (b != old_barcode) {
diff --git a/src/bustools_main.cpp b/src/bustools_main.cpp
index 580a810..fe85131 100644
--- a/src/bustools_main.cpp
+++ b/src/bustools_main.cpp
@@ -493,13 +493,14 @@ void parse_ProgramOptions_predict(int argc, char **argv, Bustools_opt& opt) {
 void parse_ProgramOptions_dump(int argc, char **argv, Bustools_opt &opt)
 {
   
-  const char *opt_string = "o:pfd";
+  const char *opt_string = "o:pfda";
   
   static struct option long_options[] = {
     {"output", required_argument, 0, 'o'},
     {"pipe", no_argument, 0, 'p'},
     {"flags", no_argument, 0, 'f'},
     {"pad", no_argument, 0, 'd'},
+    {"showAll", no_argument, 0, 'a'},
     {0, 0, 0, 0}};
   
   int option_index = 0, c;
@@ -521,6 +522,9 @@ void parse_ProgramOptions_dump(int argc, char **argv, Bustools_opt &opt)
     case 'd':
       opt.text_dumppad = true;
       break;
+    case 'a':
+      opt.text_showall = true;
+      break;
     default:
       break;
     }
@@ -2607,6 +2611,7 @@ void Bustools_dump_Usage()
             << "-f, --flags           Write the flag column" << std::endl
             << "-d, --pad             Write the pad column" << std::endl
             << "-p, --pipe            Write to standard output" << std::endl
+            << "-a, --showAll         Show hidden metadata in barcodes" << std::endl
             << std::endl;
 }
 
diff --git a/src/bustools_text.cpp b/src/bustools_text.cpp
index 2c86d3e..9eff0dc 100644
--- a/src/bustools_text.cpp
+++ b/src/bustools_text.cpp
@@ -46,6 +46,9 @@ void bustools_text(const Bustools_opt& opt) {
 		parseHeader(in, h);
 		uint32_t bclen = h.bclen;
 		uint32_t umilen = h.umilen;
+		if (opt.text_showall) {
+		  bclen = 32;
+		}
 		int rc = 0;
 		while (true) {
 			in.read((char*)p, N * sizeof(BUSData));

From f4fd12a5205772eb7e62a04a7ebd8b40835805b8 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Mon, 24 Apr 2023 05:22:15 -0700
Subject: [PATCH 45/49] fix minor bug

---
 src/bustools_correct.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index 0b2edc2..bf4093a 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -602,7 +602,7 @@ void bustools_correct_replace(Bustools_opt &opt) {
             }
             case lsb_bus:
             {
-              bd.barcode = correction | (bd.barcode & (~(1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in
+              bd.barcode = correction | (bd.barcode & ~((1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in
               break;
             }
           }

From 01f1ac5bee0508159895e88a9010858297550ef2 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Tue, 25 Apr 2023 01:25:36 -0700
Subject: [PATCH 46/49] fix bustools correct/replace

---
 src/bustools_correct.cpp | 103 ++++++++++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 34 deletions(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index bf4093a..f636f49 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -421,8 +421,7 @@ void bustools_correct_replace(Bustools_opt &opt) {
   uint32_t bclen = 0;
   uint32_t umilen = 0;
   uint32_t rplen = 0;
-  std::unordered_set<uint32_t> wc_bclen;
-  std::unordered_set<uint32_t> r_len;
+  uint32_t wc_bclen = 0;
   BUSHeader h;
   size_t nr = 0;
   size_t N = 100000;
@@ -440,7 +439,7 @@ void bustools_correct_replace(Bustools_opt &opt) {
   // lsb_meta: put replacement (format: <NNNN) into least significant bits of metadata in BUS record (existing metadata shifted left; preserve barcode record)
   // msb_bus: put replacement (format: NNNN*) into most significant bits of barcode (replacing existing bits of barcode)
   // lsb_bus: put replacement (format: *NNNN) into least significant bits of barcode (replacing existing bits of barcode)
-  
+
   
   bool dump_bool = opt.dump_bool;
   std::ofstream of;
@@ -451,7 +450,8 @@ void bustools_correct_replace(Bustools_opt &opt) {
   std::ifstream wf(opt.whitelist, std::ios::in);
   std::string line;
   line.reserve(100);
-  std::unordered_map<uint64_t,std::pair<uint64_t,char>> rp_map; // Replacement map (key = onlisted bc; pair.first = replacement bc; pair.second = type)
+  std::unordered_map<uint64_t,uint64_t> rp_map; // Replacement map (key = onlisted bc; value = replacement bc)
+  replacement_type rtype = bc_record;
   uint32_t f = 0;
   
   while (std::getline(wf, line)) {
@@ -466,29 +466,46 @@ void bustools_correct_replace(Bustools_opt &opt) {
     }
     std::transform(barcode.begin(), barcode.end(), barcode.begin(), ::toupper);
     std::transform(replacement.begin(), replacement.end(), replacement.begin(), ::toupper);
-    replacement_type rtype = bc_record;
-    if (replacement[0] == '<') rtype = lsb_meta;
-    if (replacement[0] == '*') rtype = lsb_bus;
-    if (rtype != bc_record) {
+    replacement_type rtype_ = bc_record;
+    if (replacement[0] == '<') rtype_ = lsb_meta;
+    if (replacement[0] == '*') rtype_ = lsb_bus;
+    if (rtype_ != bc_record) {
       replacement = replacement.substr(1);
     } else {
-      if (replacement[replacement.length()-1] == '<') rtype = msb_meta;
-      if (replacement[replacement.length()-1] == '*') rtype = msb_bus;
-      if (rtype != bc_record) {
+      if (replacement[replacement.length()-1] == '<') rtype_ = msb_meta;
+      if (replacement[replacement.length()-1] == '*') rtype_ = msb_bus;
+      if (rtype_ != bc_record) {
         replacement = replacement.substr(0, replacement.length()-1);
       }
     }
     if (replacement.length() == 0) continue;
+    
+    if (wc_bclen == 0) {
+      rtype = rtype_;
+      wc_bclen = barcode.length();
+      rplen = replacement.length();
+    }
+    if (rtype != rtype_) {
+      std::cerr << "Error: Replacement types not consistent in file" << std::endl;
+      exit(1);
+    }
+    if (wc_bclen != barcode.length()) {
+      std::cerr << "Error: Barcode lengths not consistent in file" << std::endl;
+      exit(1);
+    }
 
+    uint64_t rl = replacement.length();
     uint64_t bc = stringToBinary(barcode, f);
     uint64_t rp = stringToBinary(replacement, f);
     if (rp_map.find(bc) != rp_map.end()) {
       std::cerr << "Error: Duplicate entries found: " << barcode << std::endl;
       exit(1);
     }
-    rp_map.insert(std::make_pair(bc, std::make_pair(rp,rtype)));
-    wc_bclen.insert(barcode.length());
-    r_len.insert(replacement.length());
+    if (rplen != rl) {
+      std::cerr << "Error: replacement length in list inconsistent" << std::endl;
+      exit(1);
+    }
+    rp_map.insert(std::make_pair(bc, rp));
   }
   wf.close();
   
@@ -496,6 +513,7 @@ void bustools_correct_replace(Bustools_opt &opt) {
     std::cerr << "Error: replacement file malformed; no barcodes found" <<std::endl;
     exit(1);
   }
+  
   size_t n_barcodes_in_whitelist = rp_map.size();
   std::cerr << "Found " << n_barcodes_in_whitelist << " barcodes in the replacement list" << std::endl;
   
@@ -527,27 +545,22 @@ void bustools_correct_replace(Bustools_opt &opt) {
     std::istream in(inbuf);
     parseHeader(in, h);
     
-    if (!outheader_written) {
-      writeHeader(bus_out, h);
-      outheader_written = true;
-    }
-    
     if (bclen == 0) {
       bclen = h.bclen;
-      if (wc_bclen.size() != 1) {
-        std::cerr << "Error: barcode length in list inconsistent" << std::endl;
-        exit(1);
-      }
-      if (r_len.size() != 1) {
-        std::cerr << "Error: replacement length in list inconsistent" << std::endl;
-        exit(1);
+      // See if we need to adjust the barcode length in BUS file header
+      if (rtype == bc_record && bclen > rplen) {
+        h.bclen = rplen;
       }
-      rplen = *(r_len.begin());
     }
     if (umilen == 0) {
       umilen = h.umilen;
     }
     
+    if (!outheader_written) {
+      writeHeader(bus_out, h);
+      outheader_written = true;
+    }
+    
     int rc = 0;
     uint64_t len_mask = ((1ULL << (2*bclen)) - 1); // Only include n least significant bits where n=2*bclen
     while (true) {
@@ -562,16 +575,31 @@ void bustools_correct_replace(Bustools_opt &opt) {
         bd = p[i];
         uint64_t b = bd.barcode & len_mask;
         uint64_t correction = 0;
-        auto it = rp_map.find(b);
+        uint64_t b_lookup = b;
+        if (rtype == lsb_bus || rtype == msb_meta || rtype == lsb_meta) {
+          // For these, look up based off LSBs (off of wc_bclen)
+          b_lookup = b_lookup & ((1ULL << (2*wc_bclen)) - 1);
+        }
+        if (rtype == msb_bus) {
+          // For this, look up based off MSBs (from beginning of barcode)
+          if (bclen >= rplen) {
+            b_lookup = b_lookup & (~(1ULL << (2*(bclen-rplen))));
+          }
+        }
+        auto it = rp_map.find(b_lookup);
         if (it != rp_map.end()) {
           stat_white++;
-          correction = it->second.first;
-          auto rtype = it->second.second;
+          correction = it->second;
           switch (rtype) {
-            case bc_record:
+            case bc_record: // This is the only option where we'll allow replacement to be shorter than barcode
             {
               uint64_t len_mask2 = ((1ULL << (2*std::max(rplen, bclen))) - 1); // n least significant bits where n=2*max(rplen,bclen) [if rplen > bclen, overwrite based on rplen]
-              bd.barcode = correction | (bd.barcode & ~len_mask2); // Correction plus preserve the metadata bits outside barcode length (or overwrites the part where the barcode length exceeds it)
+              bd.barcode = bd.barcode & ~len_mask2; // Preserve the metadata bits outside barcode length (or overwrites the part where the barcode length exceeds it)
+              if (rplen < bclen) {
+                bd.barcode = bd.barcode >> (2*(bclen-rplen));
+                bd.barcode = bd.barcode & ~((1ULL << (2*(rplen))) - 1); // Delete everything within rplen (i.e. where the correction will eventually be)
+              }
+              bd.barcode = correction | (bd.barcode & ~len_mask2); // Correction
               break;
             }
             case msb_meta:
@@ -602,7 +630,7 @@ void bustools_correct_replace(Bustools_opt &opt) {
             }
             case lsb_bus:
             {
-              bd.barcode = correction | (bd.barcode & ~((1ULL << (2*rplen)) - 1));// Set 2*rplen LSBs to 0, and put the new replacement in
+              bd.barcode = correction | (bd.barcode & ~((1ULL << (2*rplen)) - 1)); // Set 2*rplen LSBs to 0, and put the new replacement in
               break;
             }
           }
@@ -614,7 +642,14 @@ void bustools_correct_replace(Bustools_opt &opt) {
             }
           }
         } else {
-          bus_out.write((char *)&bd, sizeof(bd)); // No correction; just write BUS record as-is
+          // No correction; except shift metadata right if necessary
+          if (rplen < bclen) {
+            uint64_t shifted_bc = bd.barcode >> (2*(bclen-rplen));
+            shifted_bc = shifted_bc & ~((1ULL << (2*(rplen))) - 1); // Delete everything within rplen (i.e. where the replacement would be)
+            bd.barcode = (bd.barcode & ((1ULL << (2*(rplen))) - 1)); // Preserve only the LSB rlen stuff
+            bd.barcode = shifted_bc | bd.barcode; // Merge
+          }
+          bus_out.write((char *)&bd, sizeof(bd)); 
           stat_uncorr++;
         }
       }

From 676f1065c5e3983c1533df99d26328cd50e5a5cc Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Wed, 26 Apr 2023 20:01:42 -0700
Subject: [PATCH 47/49] fix bustools correct replace

---
 src/bustools_correct.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bustools_correct.cpp b/src/bustools_correct.cpp
index f636f49..ef50271 100644
--- a/src/bustools_correct.cpp
+++ b/src/bustools_correct.cpp
@@ -643,7 +643,7 @@ void bustools_correct_replace(Bustools_opt &opt) {
           }
         } else {
           // No correction; except shift metadata right if necessary
-          if (rplen < bclen) {
+          if (rtype == bc_record && rplen < bclen) {
             uint64_t shifted_bc = bd.barcode >> (2*(bclen-rplen));
             shifted_bc = shifted_bc & ~((1ULL << (2*(rplen))) - 1); // Delete everything within rplen (i.e. where the replacement would be)
             bd.barcode = (bd.barcode & ((1ULL << (2*(rplen))) - 1)); // Preserve only the LSB rlen stuff

From 31b90d8fb15b97a6182161580514ff8da0298527 Mon Sep 17 00:00:00 2001
From: Pall Melsted <pmelsted@gmail.com>
Date: Wed, 31 May 2023 13:47:24 +0000
Subject: [PATCH 48/49] version bump

---
 src/Common.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common.hpp b/src/Common.hpp
index e6cfb27..badd7c4 100644
--- a/src/Common.hpp
+++ b/src/Common.hpp
@@ -12,7 +12,7 @@
 #include "roaring.hh"
 #include "hash.hpp"
 
-#define BUSTOOLS_VERSION "0.42.0"
+#define BUSTOOLS_VERSION "0.43.0"
 
 #define u_map_ std::unordered_map
 enum CAPTURE_TYPE : char

From 7a11c5a2e4fd5369e232050929de8415b9bf49c7 Mon Sep 17 00:00:00 2001
From: Delaney Sullivan <delaneyk.sullivan@gmail.com>
Date: Thu, 29 Jun 2023 15:41:36 -0700
Subject: [PATCH 49/49] update bustools count prefix to always be len 16

---
 src/bustools_count.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bustools_count.cpp b/src/bustools_count.cpp
index 6289545..8ccb6a8 100644
--- a/src/bustools_count.cpp
+++ b/src/bustools_count.cpp
@@ -702,7 +702,7 @@ void bustools_count(Bustools_opt &opt) {
     std::ofstream bcprefixof;
     bcprefixof.open(barcodes_prefix_ofn);
     for (const auto &x : barcodes) {
-      bcprefixof << binaryToString(x >> (2*bclen), 32-bclen) << "\n";
+      bcprefixof << binaryToString(x >> (2*bclen), 16) << "\n"; // Always make prefix length 16
     }
     bcprefixof.close();
   }