From 84a4bdf9ee790390ffaea5d8eeebfbb52f61f8c7 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Fri, 9 Jun 2023 16:30:57 +0200
Subject: [PATCH] Code for locating triples in an existing index

This is the first part of a series of PRs split of from the large
proof-of-concept PR https://github.com/ad-freiburg/qlever/pull/916,
which realizes SPARQL 1.1 Update
---
 src/global/IdTriple.h          |  18 ++
 src/index/CMakeLists.txt       |   1 +
 src/index/CompressedRelation.h |   1 +
 src/index/IndexMetaData.h      |  20 +-
 src/index/LocatedTriples.cpp   | 349 +++++++++++++++++++++++++++++++++
 src/index/LocatedTriples.h     | 196 ++++++++++++++++++
 src/index/MetaDataHandler.h    |  51 +++--
 test/CMakeLists.txt            |   2 +
 test/LocatedTriplesTest.cpp    | 173 ++++++++++++++++
 9 files changed, 789 insertions(+), 22 deletions(-)
 create mode 100644 src/global/IdTriple.h
 create mode 100644 src/index/LocatedTriples.cpp
 create mode 100644 src/index/LocatedTriples.h
 create mode 100644 test/LocatedTriplesTest.cpp
diff --git a/src/global/IdTriple.h b/src/global/IdTriple.h
new file mode 100644
index 0000000000..0353b8c747
--- /dev/null
+++ b/src/global/IdTriple.h
@@ -0,0 +1,18 @@
+// Copyright 2023, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#pragma once
+
+#include <array>
+
+#include "global/Id.h"
+
+// Should we have an own class for this? We need this at several places.
+using IdTriple = std::array<Id, 3>;
+
+// Hash value for such triple.
+template <typename H>
+H AbslHashValue(H h, const IdTriple& triple) {
+  return H::combine(std::move(h), triple[0], triple[1], triple[2]);
+}
diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt
index 4bbf53f647..fd65af2bd4 100644
--- a/src/index/CMakeLists.txt
+++ b/src/index/CMakeLists.txt
@@ -8,6 +8,7 @@ add_library(index
         VocabularyOnDisk.h VocabularyOnDisk.cpp
         IndexMetaData.h IndexMetaDataImpl.h
         MetaDataHandler.h
+        LocatedTriples.h LocatedTriples.cpp
         StxxlSortFunctors.h
         TextMetaData.cpp TextMetaData.h
         DocsDB.cpp DocsDB.h
diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h
index 3c6c5df80a..63d39a28ba 100644
--- a/src/index/CompressedRelation.h
+++ b/src/index/CompressedRelation.h
@@ -305,6 +305,7 @@ class CompressedRelationReader {
   static void decompressColumn(const std::vector<char>& compressedColumn,
                                size_t numRowsToRead, Iterator iterator);
 
+ public:
   // Read the block that is identified by the `blockMetaData` from the `file`,
   // decompress and return it.
   // If `columnIndices` is `nullopt`, then all columns of the block are read,
diff --git a/src/index/IndexMetaData.h b/src/index/IndexMetaData.h
index 4e3ef4b38f..3039c0ba28 100644
--- a/src/index/IndexMetaData.h
+++ b/src/index/IndexMetaData.h
@@ -1,6 +1,7 @@
 // Copyright 2015, University of Freiburg,
 // Chair of Algorithms and Data Structures.
 // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
+
 #pragma once
 
 #include <stdio.h>
@@ -13,14 +14,14 @@
 #include <utility>
 #include <vector>
 
-#include "../global/Id.h"
-#include "../util/File.h"
-#include "../util/HashMap.h"
-#include "../util/MmapVector.h"
-#include "../util/ReadableNumberFact.h"
-#include "../util/Serializer/Serializer.h"
-#include "./MetaDataHandler.h"
-#include "CompressedRelation.h"
+#include "global/Id.h"
+#include "index/CompressedRelation.h"
+#include "index/MetaDataHandler.h"
+#include "util/File.h"
+#include "util/HashMap.h"
+#include "util/MmapVector.h"
+#include "util/ReadableNumberFact.h"
+#include "util/Serializer/Serializer.h"
 
 using std::array;
 using std::pair;
@@ -86,7 +87,10 @@ class IndexMetaData {
   // name and the variable name are terrible.
 
   // For each relation, its meta data.
+ public:
   MapType _data;
+
+ private:
   // For each compressed block, its meta data.
   BlocksType _blockData;
 
diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp
new file mode 100644
index 0000000000..acd6988675
--- /dev/null
+++ b/src/index/LocatedTriples.cpp
@@ -0,0 +1,349 @@
+// Copyright 2023, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#include "index/LocatedTriples.h"
+
+#include <algorithm>
+
+#include "index/CompressedRelation.h"
+#include "index/IndexMetaData.h"
+#include "index/Permutations.h"
+
+// ____________________________________________________________________________
+LocatedTriple LocatedTriple::locateTripleInPermutation(
+    Id id1, Id id2, Id id3, const Permutation& permutation) {
+  // Get the internal data structures from the permutation.
+  auto& file = permutation._file;
+  const auto& meta = permutation._meta;
+  const auto& reader = permutation._reader;
+
+  // Find the index of the first block where the last triple is not smaller.
+  //
+  // NOTE: Since `_col2LastId` has been added to `CompressedBlockMetadata`, this
+  // can be computed without having to decompress any blocks.
+  const vector<CompressedBlockMetadata>& blocks = meta.blockData();
+  auto matchingBlock = std::lower_bound(
+      blocks.begin(), blocks.end(), std::array<Id, 3>{id1, id2, id3},
+      [&](const CompressedBlockMetadata& block, const auto& triple) -> bool {
+        if (block.col0LastId_ < triple[0]) {
+          return true;
+        } else if (block.col0LastId_ == triple[0]) {
+          if (block.col1LastId_ < triple[1]) {
+            return true;
+          } else if (block.col1LastId_ == triple[1]) {
+            return block.col2LastId_ < triple[2];
+          }
+        }
+        return false;
+      });
+  size_t blockIndex = matchingBlock - blocks.begin();
+
+  // Preliminary `FindTripleResult` object with the correct `blockIndex` and
+  // `Id`s, and a special `rowIndexInBlock` (see below) and `existsInIndex` set
+  // to `false`.
+  LocatedTriple locatedTriple{blockIndex, NO_ROW_INDEX, id1, id2, id3, false};
+
+  // If all `Id`s from all blocks are smaller, we return the index of the last
+  // block plus one (typical "end" semantics) and the special row index
+  // `NO_ROW_INDEX` (see how this is considered in `mergeTriples`).
+  if (matchingBlock == blocks.end()) {
+    AD_CORRECTNESS_CHECK(blockIndex == blocks.size());
+    return locatedTriple;
+  }
+
+  // Read and decompress the block.
+  DecompressedBlock blockTuples =
+      reader.readAndDecompressBlock(*matchingBlock, file, std::nullopt);
+
+  // Find the smallest relation `Id` that is not smaller than `id1` and get its
+  // metadata and the position of the first and last triple with that `Id` in
+  // the block.
+  //
+  // IMPORTANT: If relation `id1` exists in the index, but our triple is larger
+  // than all triples of that relation in the index and the last triple of that
+  // relation ends a block, then our block search above (correctly) landed us at
+  // the next block. We can detect this by checking whether the first relation
+  // `Id` of the block is larger than `id1` and then we should get the metadata
+  // for the `Id` and not for `id1` (which would pertain to a previous block).
+  //
+  // TODO: There is still a bug in `MetaDataWrapperHashMap::lower_bound`,
+  // which is relevant in the rare case where a triple is inserted with an
+  // `Id` for predicate that is not a new `Id`, but has not been used for a
+  // predicate in the original index.
+  //
+  // NOTE: Since we have already handled the case, where all `Id`s in the
+  // permutation are smaller, above, such a relation should exist.
+  Id searchId =
+      matchingBlock->col0FirstId_ > id1 ? matchingBlock->col0FirstId_ : id1;
+  const auto& it = meta._data.lower_bound(searchId);
+  AD_CORRECTNESS_CHECK(it != meta._data.end());
+  Id id = it.getId();
+  const auto& relationMetadata = meta.getMetaData(id);
+  size_t offsetBegin = relationMetadata.offsetInBlock_;
+  size_t offsetEnd = offsetBegin + relationMetadata.numRows_;
+  // Note: If the relation spans multiple blocks, we know that the block we
+  // found above contains only triples from that relation.
+  if (offsetBegin == std::numeric_limits<uint64_t>::max()) {
+    offsetBegin = 0;
+    offsetEnd = blockTuples.size();
+  }
+  AD_CORRECTNESS_CHECK(offsetBegin <= blockTuples.size());
+  AD_CORRECTNESS_CHECK(offsetEnd <= blockTuples.size());
+
+  // If we have found `id1`, we can do a binary search in the portion of the
+  // block that pertains to it (note the special case mentioned above, where
+  // we are already at the beginning of the next block).
+  //
+  // Otherwise, `id` is the next larger `Id` and the position of the first
+  // triple of that relation is exactly the position we are looking for.
+  if (id == id1) {
+    locatedTriple.rowIndexInBlock =
+        std::lower_bound(blockTuples.begin() + offsetBegin,
+                         blockTuples.begin() + offsetEnd,
+                         std::array<Id, 2>{id2, id3},
+                         [](const auto& a, const auto& b) {
+                           return a[0] < b[0] || (a[0] == b[0] && a[1] < b[1]);
+                         }) -
+        blockTuples.begin();
+    // Check if the triple at the found position is equal to `id1 id2 id3`.
+    // Note that our default for `existsInIndex` was set to `false` above.
+    const size_t& i = locatedTriple.rowIndexInBlock;
+    AD_CORRECTNESS_CHECK(i < blockTuples.size());
+    if (i < offsetEnd && blockTuples(i, 0) == id2 && blockTuples(i, 1) == id3) {
+      locatedTriple.existsInIndex = true;
+    }
+  } else {
+    AD_CORRECTNESS_CHECK(id1 < id);
+    locatedTriple.rowIndexInBlock = offsetBegin;
+  }
+
+  // Return the result.
+  return locatedTriple;
+}
+
+// ____________________________________________________________________________
+template <LocatedTriplesPerBlock::MatchMode matchMode>
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriplesImpl(
+    size_t blockIndex, Id id1, Id id2) const {
+  // If no located triples for `blockIndex` exist, there is no entry in `map_`.
+  if (!map_.contains(blockIndex)) {
+    return {0, 0};
+  }
+
+  // Otherwise iterate over all located triples and count how many of them exist
+  // in the index ("to be deleted") and how many are new ("to be inserted").
+  size_t countExists = 0;
+  size_t countNew = 0;
+  for (const LocatedTriple& locatedTriple : map_.at(blockIndex)) {
+    // Helper lambda for increasing the right counter.
+    auto increaseCountIf = [&](bool increase) {
+      if (increase) {
+        if (locatedTriple.existsInIndex) {
+          ++countExists;
+        } else {
+          ++countNew;
+        }
+      }
+    };
+    // Increase depending on the mode.
+    if constexpr (matchMode == MatchMode::MatchAll) {
+      increaseCountIf(true);
+    } else if constexpr (matchMode == MatchMode::MatchId1) {
+      increaseCountIf(locatedTriple.id1 == id1);
+    } else if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+      increaseCountIf(locatedTriple.id1 == id1 && locatedTriple.id2 == id2);
+    }
+  }
+  return {countNew, countExists};
+}
+
+// ____________________________________________________________________________
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriples(
+    size_t blockIndex) const {
+  return numTriplesImpl<MatchMode::MatchAll>(blockIndex);
+}
+
+// ____________________________________________________________________________
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriples(size_t blockIndex,
+                                                             Id id1) const {
+  return numTriplesImpl<MatchMode::MatchId1>(blockIndex, id1);
+}
+
+// ____________________________________________________________________________
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriples(size_t blockIndex,
+                                                             Id id1,
+                                                             Id id2) const {
+  return numTriplesImpl<MatchMode::MatchId1AndId2>(blockIndex, id1, id2);
+}
+
+// ____________________________________________________________________________
+template <LocatedTriplesPerBlock::MatchMode matchMode>
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult, Id id1,
+                                            Id id2, size_t rowIndexInBlockBegin,
+                                            size_t rowIndexInBlockEnd) const {
+  // This method should only be called if there are located triples in the
+  // specified block.
+  AD_CONTRACT_CHECK(map_.contains(blockIndex));
+
+  // The special case `block == std::nullopt` (write only located triples to
+  // `result`) is only allowed, when `id1` or `id1` and `id2` are specified.
+  AD_CONTRACT_CHECK(block.has_value() || matchMode != MatchMode::MatchAll);
+
+  // If `rowIndexInBlockEnd` has the default value (see `LocatedTriples.h`), the
+  // intended semantics is that we read the whole block (note that we can't have
+  // a default value that depends on the values of previous arguments).
+  if (rowIndexInBlockEnd == LocatedTriple::NO_ROW_INDEX && block.has_value()) {
+    rowIndexInBlockEnd = block.value().size();
+  }
+
+  // Check that `rowIndexInBlockBegin` and `rowIndexInBlockEnd` define a valid
+  // and non-emtpy range and that it is a subrange of `block` (unless the latter
+  // is `std::nullopt`).
+  if (block.has_value()) {
+    AD_CONTRACT_CHECK(rowIndexInBlockBegin < block.value().size());
+    AD_CONTRACT_CHECK(rowIndexInBlockEnd <= block.value().size());
+  }
+  AD_CONTRACT_CHECK(rowIndexInBlockBegin < rowIndexInBlockEnd);
+
+  // If we restrict `id1` and `id2`, the index block and the result must have
+  // one column (for the `id3`). Otherwise, they must have two columns (for the
+  // `id2` and the `id3`).
+  if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+    AD_CONTRACT_CHECK(!block.has_value() || block.value().numColumns() == 1);
+    AD_CONTRACT_CHECK(result.numColumns() == 1);
+  } else {
+    AD_CONTRACT_CHECK(!block.has_value() || block.value().numColumns() == 2);
+    AD_CONTRACT_CHECK(result.numColumns() == 2);
+  }
+
+  auto resultEntry = result.begin() + offsetInResult;
+  const auto& locatedTriples = map_.at(blockIndex);
+  auto locatedTriple = locatedTriples.begin();
+
+  // Helper lambda that checks whether the given located triple should be
+  // considered, given the `matchMode`.
+  auto locatedTripleMatches = [&]() {
+    if constexpr (matchMode == MatchMode::MatchAll) {
+      return true;
+    } else if constexpr (matchMode == MatchMode::MatchId1) {
+      return locatedTriple->id1 == id1;
+    } else if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+      return locatedTriple->id1 == id1 && locatedTriple->id2 == id2;
+    }
+  };
+
+  // Advance to the first located triple in the specified range.
+  while (locatedTriple != locatedTriples.end() &&
+         locatedTriple->rowIndexInBlock < rowIndexInBlockBegin) {
+    ++locatedTriple;
+  }
+
+  // Iterate over all located triples in the specified range. In the special
+  // case `block == std::nullopt` (only write located triples to `result`), all
+  // relevant located triples have `rowIndexInBlock == NO_ROW_INDEX` (here we
+  // need that `NO_ROW_INDEX` is the maximal `size_t` value minus one).
+  if (!block.has_value()) {
+    rowIndexInBlockBegin = LocatedTriple::NO_ROW_INDEX;
+    rowIndexInBlockEnd = rowIndexInBlockBegin + 1;
+    AD_CORRECTNESS_CHECK(rowIndexInBlockBegin < rowIndexInBlockEnd);
+  }
+  for (size_t rowIndex = rowIndexInBlockBegin; rowIndex < rowIndexInBlockEnd;
+       ++rowIndex) {
+    // Append triples that are marked for insertion at this `rowIndex` to the
+    // result.
+    while (locatedTriple != locatedTriples.end() &&
+           locatedTriple->rowIndexInBlock == rowIndex &&
+           locatedTriple->existsInIndex == false) {
+      if (locatedTripleMatches()) {
+        if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+          (*resultEntry)[0] = locatedTriple->id3;
+        } else {
+          (*resultEntry)[0] = locatedTriple->id2;
+          (*resultEntry)[1] = locatedTriple->id3;
+        }
+        ++resultEntry;
+      }
+      ++locatedTriple;
+    }
+
+    // Append the triple at this position to the result if and only if it is not
+    // marked for deletion and matches (also skip it if it does not match).
+    bool deleteThisEntry = false;
+    if (locatedTriple != locatedTriples.end() &&
+        locatedTriple->rowIndexInBlock == rowIndex &&
+        locatedTriple->existsInIndex == true) {
+      deleteThisEntry = locatedTripleMatches();
+      ++locatedTriple;
+    }
+    if (block.has_value() && !deleteThisEntry) {
+      *resultEntry++ = block.value()[rowIndex];
+    }
+  };
+
+  // Return the number of rows written to `result`.
+  return resultEntry - (result.begin() + offsetInResult);
+}
+
+// ____________________________________________________________________________
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult) const {
+  return mergeTriples<MatchMode::MatchAll>(blockIndex, std::move(block), result,
+                                           offsetInResult);
+}
+
+// ____________________________________________________________________________
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult, Id id1,
+                                            size_t rowIndexInBlockBegin) const {
+  return mergeTriples<MatchMode::MatchId1>(
+      blockIndex, std::move(block), result, offsetInResult, id1,
+      Id::makeUndefined(), rowIndexInBlockBegin);
+}
+
+// ____________________________________________________________________________
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult, Id id1,
+                                            Id id2, size_t rowIndexInBlockBegin,
+                                            size_t rowIndexInBlockEnd) const {
+  return mergeTriples<MatchMode::MatchId1AndId2>(
+      blockIndex, std::move(block), result, offsetInResult, id1, id2,
+      rowIndexInBlockBegin, rowIndexInBlockEnd);
+}
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os, const LocatedTriple& lt) {
+  os << "LT(" << lt.blockIndex << " "
+     << (lt.rowIndexInBlock == LocatedTriple::NO_ROW_INDEX
+             ? "NO_ROW_INDEX"
+             : std::to_string(lt.rowIndexInBlock))
+     << " " << lt.id1 << " " << lt.id2 << " " << lt.id3 << " "
+     << lt.existsInIndex << ")";
+  return os;
+}
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts) {
+  os << "{";
+  std::copy(lts.begin(), lts.end(),
+            std::ostream_iterator<LocatedTriple>(std::cout, " "));
+  os << "}";
+  return os;
+}
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os, const LocatedTriplesPerBlock& ltpb) {
+  for (auto [blockIndex, lts] : ltpb.map_) {
+    os << "Block #" << blockIndex << ": " << lts << std::endl;
+  }
+  return os;
+}
diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h
new file mode 100644
index 0000000000..bb967bfe95
--- /dev/null
+++ b/src/index/LocatedTriples.h
@@ -0,0 +1,196 @@
+// Copyright 2023, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#pragma once
+
+#include "engine/idTable/IdTable.h"
+#include "global/IdTriple.h"
+#include "util/HashMap.h"
+
+class Permutation;
+
+// A triple and its location in a particular permutation.
+//
+// If a triple is not contained in the permutation, the location is the location
+// of the next larger triple (which may be in the next block or beyond the last
+// block). For a detailed definition of all border cases, see the definition at
+// the end of this file.
+//
+// NOTE: Technically, `blockIndex` and the `existsInIndex` are redundant in this
+// record because they can be derived when the class is used. However, they are
+// useful for testing, and for a small nuber of delta triples (think millions),
+// space efficiency is not a significant issue for this class.
+struct LocatedTriple {
+  // The index of the block and the location within that block, according to the
+  // definition above.
+  size_t blockIndex;
+  size_t rowIndexInBlock;
+  // The `Id`s of the triple in the order of the permutation. For example,
+  // for an object pertaining to the SPO permutation: `id1` is the subject,
+  // `id2` is the predicate, and `id3` is the object.
+  Id id1;
+  Id id2;
+  Id id3;
+  // Flag that is true if and only if the triple exists in the permutation. It
+  // is then equal to the triple at the position given by `blockIndex` and
+  // `rowIndexInBlock`.
+  bool existsInIndex;
+
+  // Locate the given triple in the given permutation.
+  static LocatedTriple locateTripleInPermutation(
+      Id id1, Id id2, Id id3, const Permutation& permutation);
+
+  // Special row index for triples that belong to the previous block (see the
+  // definition for the location of a triple at the end of this file).
+  //
+  // NOTE: It is important that `NO_ROW_INDEX + 1 > NO_ROW_INDEX`, hence it is
+  // defined as `max() - 1` and not as the seemingly more natural `max()`.
+  static const size_t NO_ROW_INDEX = std::numeric_limits<size_t>::max() - 1;
+};
+
+// A sorted set of located triples. In `LocatedTriplesPerBlock` below, we use
+// this to store all located triples with the same `blockIndex`.
+//
+// NOTE: We could also overload `std::less` here, but the explicit specification
+// of the order makes it clearer.
+struct LocatedTripleCompare {
+  bool operator()(const LocatedTriple& x, const LocatedTriple& y) const {
+    return IdTriple{x.id1, x.id2, x.id3} < IdTriple{y.id1, y.id2, y.id3};
+  }
+};
+using LocatedTriples = std::set<LocatedTriple, LocatedTripleCompare>;
+
+// Sorted sets of located triples, grouped by block. We use this to store all
+// located triples for a permutation.
+class LocatedTriplesPerBlock {
+ private:
+  // The total number of `LocatedTriple` objects stored (for all blocks).
+  size_t numTriples_ = 0;
+
+ public:
+  // For each block with a non-empty set of located triples, the located triples
+  // in that block.
+  //
+  // NOTE: This is currently not private because we want access to
+  // `map_.size()`, `map_.clear()`, `map_.contains(...)`, and `map_.at(...)`.
+  // We could also make `LocatedTriplesPerBlock` a subclass of `HashMap<size_t,
+  // LocatedTriples>`, but not sure whether that is good style.
+  ad_utility::HashMap<size_t, LocatedTriples> map_;
+
+ public:
+  // Get the number of located triples for the given block that match `id1` (if
+  // provided) and `id2` (if provided). The return value is a pair of numbers:
+  // first, the number of existing triples ("to be deleted") and second, the
+  // number of new triples ("to be inserted").
+  std::pair<size_t, size_t> numTriples(size_t blockIndex) const;
+  std::pair<size_t, size_t> numTriples(size_t blockIndex, Id id1) const;
+  std::pair<size_t, size_t> numTriples(size_t blockIndex, Id id1, Id id2) const;
+
+  // Merge located triples for `blockIndex` with the given index `block` and
+  // write to `result`, starting from position `offsetInResult`. Consider only
+  // located triples in the range specified by `rowIndexInBlockBegin` and
+  // `rowIndexInBlockEnd`. Consider only triples that match `id1` (if provided)
+  // and `id2` (if provided). Return the number of rows written to `result`.
+  //
+  // PRECONDITIONS:
+  //
+  // 1. The set of located triples for `blockIndex` must be non-empty.
+  // Otherwise, there is no need for merging and this method shouldn't be
+  // called for efficiency reasons.
+  //
+  // 2. It is the resposibility of the caller that there is enough space for the
+  // result of the merge in `result` starting from `offsetInResult`.
+  //
+  // 3. If `block == std::nullopt`, we are adding to `result` the located
+  // triples for block `blockIndex` where the `rowIndexInBlock` is
+  // `NO_ROW_INDEX`. These actually belong to the previous block, but were
+  // larger than all triples there. This requires that `id1` or both `id1` and
+  // `id2` are specified.
+  //
+  size_t mergeTriples(size_t blockIndex, std::optional<IdTable> block,
+                      IdTable& result, size_t offsetInResult) const;
+  size_t mergeTriples(size_t blockIndex, std::optional<IdTable> block,
+                      IdTable& result, size_t offsetInResult, Id id1,
+                      size_t rowIndexInBlockBegin = 0) const;
+  size_t mergeTriples(
+      size_t blockIndex, std::optional<IdTable> block, IdTable& result,
+      size_t offsetInResult, Id id1, Id id2, size_t rowIndexInBlockBegin = 0,
+      size_t rowIndexInBlockEnd = LocatedTriple::NO_ROW_INDEX) const;
+
+  // Add the given `locatedTriple` to the given `LocatedTriplesPerBlock`.
+  // Return a handle to where it was added (`LocatedTriples` is a sorted set,
+  // see above). We need this handle so that we can easily remove the
+  // `locatedTriple` again from the set in case we need to.
+  //
+  // The `locatedTriple` must not already exist in `LocatedTriplesPerBlock`.
+  LocatedTriples::iterator add(const LocatedTriple& locatedTriple) {
+    LocatedTriples& locatedTriples = map_[locatedTriple.blockIndex];
+    auto [handle, wasInserted] = locatedTriples.emplace(locatedTriple);
+    AD_CORRECTNESS_CHECK(wasInserted == true);
+    AD_CORRECTNESS_CHECK(handle != locatedTriples.end());
+    ++numTriples_;
+    return handle;
+  };
+
+  // Get the total number of `LocatedTriple` objects (for all blocks).
+  size_t numTriples() const { return numTriples_; }
+
+  // Get the number of blocks with a non-empty set of located triples.
+  size_t numBlocks() const { return map_.size(); }
+
+  // Remove all located triples.
+  void clear() {
+    map_.clear();
+    numTriples_ = 0;
+  }
+
+ private:
+  // Match modes for `numTriplesInBlockImpl` and `mergeTriplesIntoBlockImpl`.
+  enum struct MatchMode { MatchAll, MatchId1, MatchId1AndId2 };
+
+  // The Implementation behind the public method `numTriplesInBlock` above.
+  template <MatchMode matchMode>
+  std::pair<size_t, size_t> numTriplesImpl(size_t blockIndex,
+                                           Id id1 = Id::makeUndefined(),
+                                           Id id2 = Id::makeUndefined()) const;
+
+  // The Implementation behind the public method `mergeTriplesIntoBlock` above.
+  // The only reason that the arguments `id1` and `id2` come at the end here is
+  // so that we can give them default values.
+  template <MatchMode matchMode>
+  size_t mergeTriples(
+      size_t blockIndex, std::optional<IdTable> block, IdTable& result,
+      size_t offsetInResult, Id id1 = Id::makeUndefined(),
+      Id id2 = Id::makeUndefined(), size_t rowIndexInBlockBegin = 0,
+      size_t rowIndexInBlockEnd = LocatedTriple::NO_ROW_INDEX) const;
+};
+
+// Human-readable representation of `LocatedTriple`, `LocatedTriples`, and
+// `LocatedTriplesPerBlock`, which are very useful for debugging.
+std::ostream& operator<<(std::ostream& os, const LocatedTriple& lt);
+std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts);
+std::ostream& operator<<(std::ostream& os, const LocatedTriplesPerBlock& ltpb);
+
+// DEFINITION OF THE POSITION OF A LOCATED TRIPLE IN A PERMUTATION
+//
+// 1. The position is defined by the index of a block in the permutation and the
+// index of a row within that block.
+//
+// 2. If the triple in contained in the permutation, it is contained exactly
+// once and so there is a well defined block and position in that block.
+//
+// 2. If there is a block, where the first triple is smaller and the last triple
+// is larger, then that is the block and the position in that block is that of
+// the first triple that is (not smaller and hence) larger.
+//
+// 3. If the triple falls "between two blocks" (the last triple of the previous
+// block is smaller and the first triple of the next block is larger), then the
+// position is the first position in that next block.
+//
+// 4. As a special case of 3, if the triple is smaller than all triples in the
+// permutation, the position is the first position of the first block.
+//
+// 5. If the triple is larger than all triples in the permutation, the block
+// index is one after the largest block index and the position within that
+// non-existing block is arbitrary.
diff --git a/src/index/MetaDataHandler.h b/src/index/MetaDataHandler.h
index da84f1158a..e24e33fe5c 100644
--- a/src/index/MetaDataHandler.h
+++ b/src/index/MetaDataHandler.h
@@ -1,29 +1,39 @@
-// Copyright 2018, University of Freiburg,
+// Copyright 2018 - 2023, University of Freiburg
 // Chair of Algorithms and Data Structures
-// Author: Johannes Kalmbach (johannes.kalmbach@gmail.com)
-//
+// Authors: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+//          Hannah Bast <bast@cs.uni-freiburg.de>
+
 #pragma once
 
 #include <cassert>
 #include <stxxl/vector>
 
-#include "../global/Id.h"
-#include "../util/Exception.h"
-#include "../util/HashMap.h"
-#include "../util/Iterators.h"
-#include "../util/Log.h"
-#include "../util/Serializer/Serializer.h"
-#include "./CompressedRelation.h"
-
-// _____________________________________________________________________
+#include "global/Id.h"
+#include "index/CompressedRelation.h"
+#include "util/Exception.h"
+#include "util/HashMap.h"
+#include "util/Iterators.h"
+#include "util/Log.h"
+#include "util/Serializer/Serializer.h"
+
+// Class for access to relation metadata stored in a vector. Specifically, our
+// index uses this with `M = MmapVector<CompressedRelationMetadata>>`; see
+// `index/IndexMetaData.h`
 template <class M>
 class MetaDataWrapperDense {
+ private:
+  // A vector of metadata objects.
+  M _vec;
+
  public:
+  // An iterator with an additional method `getId()` that gives the relation ID
+  // of the current metadata object.
   template <typename BaseIterator>
   struct AddGetIdIterator : BaseIterator {
     using BaseIterator::BaseIterator;
     AddGetIdIterator(BaseIterator base) : BaseIterator{base} {}
     [[nodiscard]] Id getId() const { return getIdFromElement(*(*this)); }
+    [[nodiscard]] const auto& getMetaData() const { return *(*this); }
     static Id getIdFromElement(const typename BaseIterator::value_type& v) {
       return v.col0Id_;
     }
@@ -39,6 +49,7 @@ class MetaDataWrapperDense {
   // The underlying array is sorted, so all iterators are ordered iterators
   using ConstOrderedIterator = ConstIterator;
 
+  // The type of the stored metadata objects.
   using value_type = typename M::value_type;
 
   // _________________________________________________________
@@ -109,12 +120,24 @@ class MetaDataWrapperDense {
   // ___________________________________________________________
   std::string getFilename() const { return _vec.getFilename(); }
 
- private:
+  // The following used to be private (because they were only used as
+  // subroutines in the above), but we now need them in
+  // `DeltaTriples::findTripleResult`.
   ConstIterator lower_bound(Id id) const {
     auto cmp = [](const auto& metaData, Id id) {
       return metaData.col0Id_ < id;
     };
     return std::lower_bound(_vec.begin(), _vec.end(), id, cmp);
   }
-  M _vec;
+  Iterator lower_bound(Id id) {
+    auto cmp = [](const auto& metaData, Id id) {
+      return metaData.col0Id_ < id;
+    };
+    return std::lower_bound(_vec.begin(), _vec.end(), id, cmp);
+  }
 };
+
+// =======
+//   M _vec;
+// };
+// >>>>>>> master
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f94b54c063..c81ecaa9c5 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -104,6 +104,8 @@ addLinkAndDiscoverTest(IndexMetaDataTest index)
 # TODO<qup42, joka921> fix this
 addLinkAndDiscoverTestSerial(IndexTest index)
 
+addLinkAndDiscoverTestSerial(LocatedTriplesTest index)
+
 addLinkAndDiscoverTest(FTSAlgorithmsTest index)
 
 addLinkAndDiscoverTest(EngineTest engine)
diff --git a/test/LocatedTriplesTest.cpp b/test/LocatedTriplesTest.cpp
new file mode 100644
index 0000000000..ce4d0b909e
--- /dev/null
+++ b/test/LocatedTriplesTest.cpp
@@ -0,0 +1,173 @@
+//  Copyright 2023, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#include <gtest/gtest.h>
+
+#include "./util/IdTableHelpers.h"
+#include "./util/IdTestHelpers.h"
+#include "index/CompressedRelation.h"
+#include "index/IndexMetaData.h"
+#include "index/LocatedTriples.h"
+#include "index/Permutations.h"
+
+// TODO: Why the namespace here? (copied from `test/IndexMetaDataTest.cpp`)
+namespace {
+auto V = ad_utility::testing::VocabId;
+}
+
+// Fixture with helper functions.
+class LocatedTriplesTest : public ::testing::Test {
+ protected:
+  // Make `LocatedTriplesPerBlock` from a list of `LocatedTriple` objects (the
+  // order in which the objects are given does not matter).
+  LocatedTriplesPerBlock makeLocatedTriplesPerBlock(
+      std::vector<LocatedTriple> locatedTriples) {
+    LocatedTriplesPerBlock result;
+    for (auto locatedTriple : locatedTriples) {
+      result.add(locatedTriple);
+    }
+    return result;
+  }
+};
+
+// Test the method that counts the number of `LocatedTriple's in a block.
+TEST_F(LocatedTriplesTest, numTriplesInBlock) {
+  // Set up lists of located triples for three blocks.
+  auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock(
+      {LocatedTriple{1, 0, V(10), V(1), V(0), true},
+       LocatedTriple{1, 0, V(10), V(2), V(1), true},
+       LocatedTriple{1, 0, V(11), V(3), V(0), false},
+       LocatedTriple{2, 0, V(20), V(4), V(0), false},
+       LocatedTriple{2, 0, V(21), V(5), V(0), false},
+       LocatedTriple{3, 0, V(30), V(6), V(0), false},
+       LocatedTriple{3, 0, V(32), V(7), V(0), true}});
+  ASSERT_EQ(locatedTriplesPerBlock.numBlocks(), 3);
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(), 7);
+
+  auto P = [](size_t n1, size_t n2) -> std::pair<size_t, size_t> {
+    return {n1, n2};
+  };
+
+  // Check the total counts per block.
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1), P(1, 2));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2), P(2, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3), P(1, 1));
+
+  // Check the counts per block for a given `id1`.
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(10)), P(0, 2));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(11)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(20)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(21)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(30)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(32)), P(0, 1));
+
+  // Check the counts per block for a given `id1` and `id2`.
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(10), V(1)), P(0, 1));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(10), V(2)), P(0, 1));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(11), V(3)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(20), V(4)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(21), V(5)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(30), V(6)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(32), V(7)), P(0, 1));
+}
+
+// Test the method that merges the matching `LocatedTriple`s from a block into a
+// part of an `IdTable`.
+TEST_F(LocatedTriplesTest, mergeTriples) {
+  // A block, as it could come from an index scan.
+  IdTable block = makeIdTableFromVector({{10, 10},    // Row 0
+                                         {15, 20},    // Row 1
+                                         {15, 30},    // Row 2
+                                         {20, 10},    // Row 3
+                                         {30, 20},    // Row 4
+                                         {30, 30}});  // Row 5
+
+  // A set of located triples for that block.
+  auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock(
+      {LocatedTriple{1, 0, V(1), V(10), V(10), true},    // Delete row 0
+       LocatedTriple{1, 1, V(1), V(10), V(11), false},   // Insert before row 1
+       LocatedTriple{1, 1, V(2), V(11), V(10), false},   // Insert before row 1
+       LocatedTriple{1, 4, V(2), V(21), V(11), false},   // Insert before row 4
+       LocatedTriple{1, 4, V(2), V(30), V(10), false},   // Insert before row 4
+       LocatedTriple{1, 4, V(2), V(30), V(20), true},    // Delete row 4
+       LocatedTriple{1, 5, V(3), V(30), V(30), true}});  // Delete row 5
+
+  // Merge all these triples into `block` and check that the result is as
+  // expected (four triples inserted and three triples deleted).
+  {
+    IdTable resultExpected = makeIdTableFromVector({{10, 11},    // Row 0
+                                                    {11, 10},    // Row 1
+                                                    {15, 20},    // Row 2
+                                                    {15, 30},    // Row 3
+                                                    {20, 10},    // Row 4
+                                                    {21, 11},    // Row 5
+                                                    {30, 10}});  // Row 6
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, block.clone(), result, 0);
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Merge only the triples with `id1 == V(2)` into `block` (three triples
+  // inserted and one triple deleted).
+  {
+    IdTable resultExpected = makeIdTableFromVector({{10, 10},    // Row 0
+                                                    {11, 10},    // Row 1
+                                                    {15, 20},    // Row 2
+                                                    {15, 30},    // Row 3
+                                                    {20, 10},    // Row 4
+                                                    {21, 11},    // Row 5
+                                                    {30, 10},    // Row 6
+                                                    {30, 30}});  // Row 7
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, block.clone(), result, 0, V(2));
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Repeat but with a partial block that leaves out the first two elements of
+  // `block`.
+  {
+    IdTable resultExpected = makeIdTableFromVector({{15, 30},    // Row 0
+                                                    {20, 10},    // Row 1
+                                                    {21, 11},    // Row 2
+                                                    {30, 10},    // Row 3
+                                                    {30, 30}});  // Row 4
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, block.clone(), result, 0, V(2), 2);
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Merge only the triples with `id1 == V(2)` and `id2 == V(30)` into the
+  // corresponding partial block (one triple inserted, one triple deleted).
+  {
+    IdTable blockColumnId3(1, ad_utility::testing::makeAllocator());
+    blockColumnId3.resize(block.size());
+    for (size_t i = 0; i < block.size(); ++i) {
+      blockColumnId3(i, 0) = block(i, 1);
+    }
+    IdTable resultExpected = makeIdTableFromVector({{10}, {30}});
+    IdTable result(1, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, std::move(blockColumnId3), result, 0,
+                                        V(2), V(30), 4, 6);
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Merge special triples.
+  {
+    size_t NRI = LocatedTriple::NO_ROW_INDEX;
+    auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock(
+        {LocatedTriple{2, NRI, V(1), V(30), V(40), true},
+         LocatedTriple{2, NRI, V(1), V(30), V(50), true},
+         LocatedTriple{2, NRI, V(1), V(40), V(10), true}});
+    IdTable resultExpected = makeIdTableFromVector({{30, 40},    // Row 0
+                                                    {30, 50},    // Row 1
+                                                    {40, 10}});  // Row 2
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(2, std::nullopt, result, 0, V(1));
+  }
+}