From e6183a625f5285b300e8230b03b1eeef7610eaf9 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 10 Oct 2024 02:15:13 +0200 Subject: [PATCH 01/10] Implement lazy bind operation --- src/engine/Bind.cpp | 126 +++++++++++++++++++++-------------- src/engine/Bind.h | 14 ++-- src/engine/idTable/IdTable.h | 6 ++ 3 files changed, 89 insertions(+), 57 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 83aef56e2e..40e127c69e 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -81,64 +81,92 @@ std::vector Bind::getChildren() { } // _____________________________________________________________________________ -ProtoResult Bind::computeResult([[maybe_unused]] bool requestLaziness) { - using std::endl; - LOG(DEBUG) << "Get input to BIND operation..." << endl; - std::shared_ptr subRes = _subtree->getResult(); - LOG(DEBUG) << "Got input to Bind operation." << endl; - IdTable idTable{getExecutionContext()->getAllocator()}; - - idTable.setNumColumns(getResultWidth()); - - // Make a deep copy of the local vocab from `subRes` and then add to it (in - // case BIND adds a new word or words). - // - // TODO: In most BIND operations, nothing is added to the local vocabulary, so - // it would be more efficient to first share the pointer here (like with - // `shareLocalVocabFrom`) and only copy it when a new word is about to be - // added. Same for GROUP BY. - auto localVocab = subRes->getCopyOfLocalVocab(); - - size_t inwidth = subRes->idTable().numColumns(); - size_t outwidth = getResultWidth(); - - CALL_FIXED_SIZE((std::array{inwidth, outwidth}), &Bind::computeExpressionBind, - this, &idTable, &localVocab, *subRes, - _bind._expression.getPimpl()); - - LOG(DEBUG) << "BIND result computation done." << endl; - return {std::move(idTable), resultSortedOn(), std::move(localVocab)}; +ProtoResult Bind::computeResult(bool requestLaziness) { + LOG(DEBUG) << "Get input to BIND operation..." << std::endl; + std::shared_ptr subRes = _subtree->getResult(requestLaziness); + LOG(DEBUG) << "Got input to Bind operation." << std::endl; + + auto applyBind = [this, subRes](auto&& idTable, LocalVocab* localVocab) { + size_t inwidth = idTable.numColumns(); + size_t outwidth = getResultWidth(); + return ad_utility::callFixedSize( + (std::array{inwidth, outwidth}), + [this, &subRes, localVocab, + &idTable]() { + return computeExpressionBind( + localVocab, AD_FWD(idTable), subRes->localVocab(), + _bind._expression.getPimpl()); + }); + }; + + if (subRes->isFullyMaterialized()) { + // Make a deep copy of the local vocab from `subRes` and then add to it (in + // case BIND adds a new word or words). + // + // TODO: In most BIND operations, nothing is added to the local vocabulary, + // so it would be more efficient to first share the pointer here (like with + // `shareLocalVocabFrom`) and only copy it when a new word is about to be + // added. Same for GROUP BY. + LocalVocab localVocab = subRes->getCopyOfLocalVocab(); + IdTable result = applyBind(subRes->idTable(), &localVocab); + LOG(DEBUG) << "BIND result computation done." << std::endl; + return {std::move(result), resultSortedOn(), std::move(localVocab)}; + } + auto localVocab = std::make_shared(); + auto generator = + [](std::shared_ptr vocab, auto applyBind, + std::shared_ptr result) -> cppcoro::generator { + for (IdTable& idTable : result->idTables()) { + co_yield applyBind(idTable, vocab.get()); + } + std::array vocabs{vocab.get(), &result->localVocab()}; + *vocab = LocalVocab::merge(std::span{vocabs}); + }(localVocab, std::move(applyBind), std::move(subRes)); + return {std::move(generator), resultSortedOn(), std::move(localVocab)}; } // _____________________________________________________________________________ template -void Bind::computeExpressionBind( - IdTable* outputIdTable, LocalVocab* outputLocalVocab, - const Result& inputResultTable, +IdTable Bind::computeExpressionBind( + LocalVocab* outputLocalVocab, + ad_utility::SimilarTo auto&& inputIdTable, + const LocalVocab& inputLocalVocab, sparqlExpression::SparqlExpression* expression) const { sparqlExpression::EvaluationContext evaluationContext( - *getExecutionContext(), _subtree->getVariableColumns(), - inputResultTable.idTable(), getExecutionContext()->getAllocator(), - inputResultTable.localVocab(), cancellationHandle_, deadline_); + *getExecutionContext(), _subtree->getVariableColumns(), inputIdTable, + getExecutionContext()->getAllocator(), inputLocalVocab, + cancellationHandle_, deadline_); sparqlExpression::ExpressionResult expressionResult = expression->evaluate(&evaluationContext); - - const auto input = inputResultTable.idTable().asStaticView(); - auto output = std::move(*outputIdTable).toStatic(); - - // first initialize the first columns (they remain identical) - const auto inSize = input.size(); - output.reserve(inSize); - const auto inCols = input.numColumns(); - // copy the input to the first numColumns; - for (size_t i = 0; i < inSize; ++i) { - output.emplace_back(); - for (size_t j = 0; j < inCols; ++j) { - output(i, j) = input(i, j); + size_t inSize = inputIdTable.size(); + size_t inCols = inputIdTable.numColumns(); + + auto output = [this, &inputIdTable, inSize, inCols]() { + if constexpr (std::is_const_v< + std::remove_reference_t>) { + const auto input = inputIdTable.template asStaticView(); + auto output = + IdTable{getResultWidth(), getExecutionContext()->getAllocator()} + .template toStatic(); + + // first initialize the first columns (they remain identical) + output.reserve(inSize); + // copy the input to the first numColumns; + for (size_t i = 0; i < inSize; ++i) { + output.emplace_back(); + for (size_t j = 0; j < inCols; ++j) { + output(i, j) = input(i, j); + } + checkCancellation(); + } + return output; + } else { + IdTable output = std::move(inputIdTable); + output.addEmptyColumn(); + return std::move(output).toStatic(); } - checkCancellation(); - } + }(); auto visitor = [&]( T&& singleResult) mutable { @@ -190,5 +218,5 @@ void Bind::computeExpressionBind( std::visit(visitor, std::move(expressionResult)); - *outputIdTable = std::move(output).toDynamic(); + return std::move(output).toDynamic(); } diff --git a/src/engine/Bind.h b/src/engine/Bind.h index bb1996a967..30bf0d4d58 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -2,8 +2,7 @@ // Created by johannes on 19.04.20. // -#ifndef QLEVER_BIND_H -#define QLEVER_BIND_H +#pragma once #include "engine/Operation.h" #include "engine/sparqlExpressions/SparqlExpressionPimpl.h" @@ -46,16 +45,15 @@ class Bind : public Operation { [[nodiscard]] vector resultSortedOn() const override; private: - ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult(bool requestLaziness) override; // Implementation for the binding of arbitrary expressions. template - void computeExpressionBind( - IdTable* outputIdTable, LocalVocab* outputLocalVocab, - const Result& inputResultTable, + IdTable computeExpressionBind( + LocalVocab* outputLocalVocab, + ad_utility::SimilarTo auto&& inputIdTable, + const LocalVocab& inputLocalVocab, sparqlExpression::SparqlExpression* expression) const; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; - -#endif // QLEVER_BIND_H diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index 1f1f502503..29a0257cac 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -278,6 +278,12 @@ class IdTable { data().resize(numColumns, ColumnStorage{allocator_}); } + // Add a new empty column to the table. + void addEmptyColumn() requires columnsAreAllocatable && isDynamic { + data().emplace_back(size(), allocator_); + ++numColumns_; + } + // The number of rows in the table. We deliberately have an explicitly named // function `numRows` as well as a generic `size` function because the latter // can be used to write generic code, for example when using STL algorithms on From 22a1f0e8f33da983aa1601d7e137c5e0266dea39 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 10 Oct 2024 02:55:33 +0200 Subject: [PATCH 02/10] Add Unit Tests for new functionality --- test/IdTableTest.cpp | 14 ++++++++++++ test/engine/BindTest.cpp | 46 ++++++++++++++++++++++++++++++++++++++ test/engine/CMakeLists.txt | 1 + 3 files changed, 61 insertions(+) create mode 100644 test/engine/BindTest.cpp diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index 46fb9a0f6c..360686497b 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -1119,6 +1119,20 @@ TEST(IdTable, constructorsAreSfinaeFriendly) { static_assert(std::is_constructible_v); } +// _____________________________________________________________________________ +TEST(IdTable, addEmptyColumn) { + using ::testing::ElementsAre; + IdTable table{1, ad_utility::makeUnlimitedAllocator()}; + table.push_back({V(1)}); + table.push_back({V(2)}); + + table.addEmptyColumn(); + + EXPECT_EQ(table.numColumns(), 2); + EXPECT_THAT(table.getColumn(0), ElementsAre(V(1), V(2))); + EXPECT_THAT(table.getColumn(1), ElementsAre(UndefId(), UndefId())); +} + // Check that we can completely instantiate `IdTable`s with a different value // type and a different underlying storage. diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp new file mode 100644 index 0000000000..8f5d54f2ea --- /dev/null +++ b/test/engine/BindTest.cpp @@ -0,0 +1,46 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include + +#include "../util/IdTableHelpers.h" +#include "../util/IndexTestHelpers.h" +#include "./ValuesForTesting.h" +#include "engine/Bind.h" +#include "engine/sparqlExpressions/LiteralExpression.h" + +using namespace sparqlExpression; + +// _____________________________________________________________________________ +TEST(Bind, computeResult) { + using Vars = std::vector>; + auto* qec = ad_utility::testing::getQec(); + auto valuesTree = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1}, {2}, {3}, {4}}), Vars{Variable{"?a"}}); + Bind bind{ + qec, + std::move(valuesTree), + {SparqlExpressionPimpl{ + std::make_unique(Variable{"?a"}), "?a as ?b"}, + Variable{"?b"}}}; + + { + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); + ASSERT_TRUE(result->isFullyMaterialized()); + EXPECT_EQ(result->idTable(), + makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}})); + } + + { + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); + ASSERT_FALSE(result->isFullyMaterialized()); + auto& idTables = result->idTables(); + auto begin = idTables.begin(); + ASSERT_NE(begin, idTables.end()); + EXPECT_EQ(*begin, makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}})); + EXPECT_EQ(++begin, idTables.end()); + } +} diff --git a/test/engine/CMakeLists.txt b/test/engine/CMakeLists.txt index 8f283ffb88..a57b9636e1 100644 --- a/test/engine/CMakeLists.txt +++ b/test/engine/CMakeLists.txt @@ -7,3 +7,4 @@ addLinkAndDiscoverTest(SpatialJoinTest engine) addLinkAndDiscoverTest(DistinctTest engine) addLinkAndDiscoverTest(GroupByHashMapOptimizationTest) addLinkAndDiscoverTest(LazyGroupByTest engine) +addLinkAndDiscoverTest(BindTest engine) From ca78a55222b3ba24a4d3a5657afddbd4002477ec Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:22:04 +0200 Subject: [PATCH 03/10] Fix unused captures warning --- src/engine/Bind.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 40e127c69e..975befd338 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -162,6 +162,9 @@ IdTable Bind::computeExpressionBind( } return output; } else { + (void)this; + (void)inSize; + (void)inCols; IdTable output = std::move(inputIdTable); output.addEmptyColumn(); return std::move(output).toStatic(); From d2213cd46d026a4d1806cf59fdb8baf0165e9520 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:52:22 +0200 Subject: [PATCH 04/10] Replace undefined comparison --- test/IdTableTest.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index 360686497b..d8110ed436 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -1122,6 +1122,7 @@ TEST(IdTable, constructorsAreSfinaeFriendly) { // _____________________________________________________________________________ TEST(IdTable, addEmptyColumn) { using ::testing::ElementsAre; + using ::testing::Eq; IdTable table{1, ad_utility::makeUnlimitedAllocator()}; table.push_back({V(1)}); table.push_back({V(2)}); @@ -1130,7 +1131,10 @@ TEST(IdTable, addEmptyColumn) { EXPECT_EQ(table.numColumns(), 2); EXPECT_THAT(table.getColumn(0), ElementsAre(V(1), V(2))); - EXPECT_THAT(table.getColumn(1), ElementsAre(UndefId(), UndefId())); + EXPECT_THAT( + table.getColumn(1), + ElementsAre(AD_PROPERTY(Id, getDatatype, Eq(Datatype::Undefined)), + AD_PROPERTY(Id, getDatatype, Eq(Datatype::Undefined)))); } // Check that we can completely instantiate `IdTable`s with a different value From 8d22e7f8b5245f00372d0c7c75d6b5e04a8efb50 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 10 Oct 2024 05:03:57 +0200 Subject: [PATCH 05/10] Extend test cases and omit uninitialized values --- test/IdTableTest.cpp | 7 ++- test/engine/BindTest.cpp | 92 +++++++++++++++++++++++++++++++++++----- 2 files changed, 84 insertions(+), 15 deletions(-) diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index d8110ed436..34b1ad3072 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -1131,10 +1131,9 @@ TEST(IdTable, addEmptyColumn) { EXPECT_EQ(table.numColumns(), 2); EXPECT_THAT(table.getColumn(0), ElementsAre(V(1), V(2))); - EXPECT_THAT( - table.getColumn(1), - ElementsAre(AD_PROPERTY(Id, getDatatype, Eq(Datatype::Undefined)), - AD_PROPERTY(Id, getDatatype, Eq(Datatype::Undefined)))); + // The new column is uninitialized, so we can't make any more specific + // assertions about its content here. + EXPECT_EQ(table.getColumn(1).size(), 2); } // Check that we can completely instantiate `IdTable`s with a different value diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp index 8f5d54f2ea..bb69d70958 100644 --- a/test/engine/BindTest.cpp +++ b/test/engine/BindTest.cpp @@ -11,19 +11,41 @@ #include "engine/sparqlExpressions/LiteralExpression.h" using namespace sparqlExpression; +using Vars = std::vector>; -// _____________________________________________________________________________ -TEST(Bind, computeResult) { - using Vars = std::vector>; - auto* qec = ad_utility::testing::getQec(); +namespace { +Bind makeBindForIdTable(QueryExecutionContext* qec, IdTable idTable) { auto valuesTree = ad_utility::makeExecutionTree( - qec, makeIdTableFromVector({{1}, {2}, {3}, {4}}), Vars{Variable{"?a"}}); - Bind bind{ + qec, std::move(idTable), Vars{Variable{"?a"}}); + return { qec, std::move(valuesTree), {SparqlExpressionPimpl{ std::make_unique(Variable{"?a"}), "?a as ?b"}, Variable{"?b"}}}; +} + +IdTable getSingleIdTable(cppcoro::generator& generator) { + std::optional result = std::nullopt; + for (IdTable& idTable : generator) { + if (result.has_value()) { + ADD_FAILURE() << "More than one IdTable was generated"; + break; + } + result = std::move(idTable); + } + if (!result.has_value()) { + throw std::runtime_error{"No IdTable was generated"}; + } + return std::move(result).value(); +} +} // namespace + +// _____________________________________________________________________________ +TEST(Bind, computeResult) { + auto* qec = ad_utility::testing::getQec(); + Bind bind = + makeBindForIdTable(qec, makeIdTableFromVector({{1}, {2}, {3}, {4}})); { qec->getQueryTreeCache().clearAll(); @@ -37,10 +59,58 @@ TEST(Bind, computeResult) { qec->getQueryTreeCache().clearAll(); auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); ASSERT_FALSE(result->isFullyMaterialized()); - auto& idTables = result->idTables(); - auto begin = idTables.begin(); - ASSERT_NE(begin, idTables.end()); - EXPECT_EQ(*begin, makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}})); - EXPECT_EQ(++begin, idTables.end()); + EXPECT_EQ(getSingleIdTable(result->idTables()), + makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}})); + } +} + +// _____________________________________________________________________________ +TEST(Bind, computeResultWithTableWithoutRows) { + auto* qec = ad_utility::testing::getQec(); + Bind bind = makeBindForIdTable( + qec, IdTable{1, ad_utility::makeUnlimitedAllocator()}); + + { + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); + ASSERT_TRUE(result->isFullyMaterialized()); + EXPECT_EQ(result->idTable(), + (IdTable{2, ad_utility::makeUnlimitedAllocator()})); + } + + { + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); + ASSERT_FALSE(result->isFullyMaterialized()); + EXPECT_EQ(getSingleIdTable(result->idTables()), + (IdTable{2, ad_utility::makeUnlimitedAllocator()})); + } +} + +// _____________________________________________________________________________ +TEST(Bind, computeResultWithTableWithoutColumns) { + auto val = Id::makeFromInt(42); + auto* qec = ad_utility::testing::getQec(); + auto valuesTree = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{}, {}}), Vars{}); + Bind bind{ + qec, + std::move(valuesTree), + {SparqlExpressionPimpl{std::make_unique(val), "42 as ?b"}, + Variable{"?b"}}}; + + { + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); + ASSERT_TRUE(result->isFullyMaterialized()); + EXPECT_EQ(result->idTable(), makeIdTableFromVector({{val}, {val}})); + } + + { + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); + ASSERT_FALSE(result->isFullyMaterialized()); + EXPECT_EQ(getSingleIdTable(result->idTables()), + makeIdTableFromVector({{val}, {val}})); } } From c7d88774bff261c31bf33ba14981155c760dc449 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 10 Oct 2024 16:17:26 +0200 Subject: [PATCH 06/10] Simplify code by dropping static id tables --- src/engine/Bind.cpp | 70 ++++++++++-------------------------- src/engine/Bind.h | 6 ---- src/engine/idTable/IdTable.h | 28 +++++++++++---- test/IdTableTest.cpp | 18 ++++++++++ 4 files changed, 57 insertions(+), 65 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 975befd338..2dbb8b352e 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -87,16 +87,9 @@ ProtoResult Bind::computeResult(bool requestLaziness) { LOG(DEBUG) << "Got input to Bind operation." << std::endl; auto applyBind = [this, subRes](auto&& idTable, LocalVocab* localVocab) { - size_t inwidth = idTable.numColumns(); - size_t outwidth = getResultWidth(); - return ad_utility::callFixedSize( - (std::array{inwidth, outwidth}), - [this, &subRes, localVocab, - &idTable]() { - return computeExpressionBind( - localVocab, AD_FWD(idTable), subRes->localVocab(), - _bind._expression.getPimpl()); - }); + return computeExpressionBind(localVocab, AD_FWD(idTable), + subRes->localVocab(), + _bind._expression.getPimpl()); }; if (subRes->isFullyMaterialized()) { @@ -126,7 +119,6 @@ ProtoResult Bind::computeResult(bool requestLaziness) { } // _____________________________________________________________________________ -template IdTable Bind::computeExpressionBind( LocalVocab* outputLocalVocab, ad_utility::SimilarTo auto&& inputIdTable, @@ -139,37 +131,10 @@ IdTable Bind::computeExpressionBind( sparqlExpression::ExpressionResult expressionResult = expression->evaluate(&evaluationContext); - size_t inSize = inputIdTable.size(); - size_t inCols = inputIdTable.numColumns(); - - auto output = [this, &inputIdTable, inSize, inCols]() { - if constexpr (std::is_const_v< - std::remove_reference_t>) { - const auto input = inputIdTable.template asStaticView(); - auto output = - IdTable{getResultWidth(), getExecutionContext()->getAllocator()} - .template toStatic(); - - // first initialize the first columns (they remain identical) - output.reserve(inSize); - // copy the input to the first numColumns; - for (size_t i = 0; i < inSize; ++i) { - output.emplace_back(); - for (size_t j = 0; j < inCols; ++j) { - output(i, j) = input(i, j); - } - checkCancellation(); - } - return output; - } else { - (void)this; - (void)inSize; - (void)inCols; - IdTable output = std::move(inputIdTable); - output.addEmptyColumn(); - return std::move(output).toStatic(); - } - }(); + + auto output = std::move(AD_FWD(inputIdTable)).cloneOrMove(); + output.addEmptyColumn(); + auto outputColumn = output.getColumn(output.numColumns() - 1); auto visitor = [&]( T&& singleResult) mutable { @@ -177,22 +142,23 @@ IdTable Bind::computeExpressionBind( constexpr static bool isStrongId = std::is_same_v; if constexpr (isVariable) { - auto column = + auto columnIndex = getInternallyVisibleVariableColumns().at(singleResult).columnIndex_; - for (size_t i = 0; i < inSize; ++i) { - output(i, inCols) = output(i, column); + auto inputColumn = output.getColumn(columnIndex); + for (size_t i = 0; i < outputColumn.size(); ++i) { + outputColumn[i] = inputColumn[i]; checkCancellation(); } } else if constexpr (isStrongId) { - for (size_t i = 0; i < inSize; ++i) { - output(i, inCols) = singleResult; + for (size_t i = 0; i < outputColumn.size(); ++i) { + outputColumn[i] = singleResult; checkCancellation(); } } else { constexpr bool isConstant = sparqlExpression::isConstantResult; auto resultGenerator = sparqlExpression::detail::makeGenerator( - std::forward(singleResult), inSize, &evaluationContext); + std::forward(singleResult), outputColumn.size(), &evaluationContext); if constexpr (isConstant) { auto it = resultGenerator.begin(); @@ -200,8 +166,8 @@ IdTable Bind::computeExpressionBind( Id constantId = sparqlExpression::detail::constantExpressionResultToId( std::move(*it), *outputLocalVocab); - for (size_t i = 0; i < inSize; ++i) { - output(i, inCols) = constantId; + for (size_t i = 0; i < outputColumn.size(); ++i) { + outputColumn[i] = constantId; checkCancellation(); } } @@ -209,7 +175,7 @@ IdTable Bind::computeExpressionBind( size_t i = 0; // We deliberately move the values from the generator. for (auto& resultValue : resultGenerator) { - output(i, inCols) = + outputColumn[i] = sparqlExpression::detail::constantExpressionResultToId( std::move(resultValue), *outputLocalVocab); i++; @@ -221,5 +187,5 @@ IdTable Bind::computeExpressionBind( std::visit(visitor, std::move(expressionResult)); - return std::move(output).toDynamic(); + return output; } diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 30bf0d4d58..22211763ee 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -36,11 +36,6 @@ class Bind : public Operation { float getMultiplicity(size_t col) override; bool knownEmptyResult() override; - // Returns the variable to which the expression will be bound - [[nodiscard]] const string& targetVariable() const { - return _bind._target.name(); - } - protected: [[nodiscard]] vector resultSortedOn() const override; @@ -48,7 +43,6 @@ class Bind : public Operation { ProtoResult computeResult(bool requestLaziness) override; // Implementation for the binding of arbitrary expressions. - template IdTable computeExpressionBind( LocalVocab* outputLocalVocab, ad_utility::SimilarTo auto&& inputIdTable, diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index 29a0257cac..264c33c9c1 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -435,13 +435,27 @@ class IdTable { push_back(std::ranges::ref_view{newRow}); } - // Create a deep copy of this `IdTable` that owns its memory. In most cases - // this behaves exactly like the copy constructor with the following - // exception: If `this` is a view (because the `isView` template parameter is - // `true`), then the copy constructor will also create a (const and - // non-owning) view, but `clone` will create a mutable deep copy of the data - // that the view points to - IdTable clone() const + // Same semantics as clone, but if we have a temporary, we can move instead. + IdTable cloneOrMove() const& requires( + !isView && std::is_copy_constructible_v && + std::is_copy_constructible_v) { + return clone(); + } + + // Optimization for rvalues: We can move the data instead of copying it. + IdTable cloneOrMove() && + requires(!isView && std::is_copy_constructible_v && + std::is_copy_constructible_v) { + return std::move(*this); + } + + // Create a deep copy of this `IdTable` that owns its memory. In most + // cases this behaves exactly like the copy constructor with the following + // exception: If `this` is a view (because the `isView` template parameter + // is `true`), then the copy constructor will also create a (const and + // non-owning) view, but `clone` will create a mutable deep copy of the + // data that the view points to + IdTable clone() const requires std::is_copy_constructible_v && std::is_copy_constructible_v { Storage storage; diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index 34b1ad3072..4abcf6e34f 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -1136,6 +1136,24 @@ TEST(IdTable, addEmptyColumn) { EXPECT_EQ(table.getColumn(1).size(), 2); } +// _____________________________________________________________________________ +TEST(IdTable, cloneOrMove) { + using ::testing::ElementsAre; + using ::testing::Eq; + IdTable source{1, ad_utility::makeUnlimitedAllocator()}; + source.push_back({V(1)}); + source.push_back({V(2)}); + + IdTable clone = static_cast(source).cloneOrMove(); + + EXPECT_EQ(source, clone); + + IdTable moved = std::move(source).cloneOrMove(); + + EXPECT_EQ(clone, moved); + EXPECT_EQ(source.numRows(), 0); +} + // Check that we can completely instantiate `IdTable`s with a different value // type and a different underlying storage. From d1c3bd18a3f0b8487f5b3e5518c403fb9cc72708 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 11 Oct 2024 02:39:25 +0200 Subject: [PATCH 07/10] Reduce code duplication --- src/engine/Bind.cpp | 5 +-- test/engine/BindTest.cpp | 78 +++++++++++++--------------------------- 2 files changed, 27 insertions(+), 56 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 2dbb8b352e..da9bac1597 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -132,7 +132,7 @@ IdTable Bind::computeExpressionBind( sparqlExpression::ExpressionResult expressionResult = expression->evaluate(&evaluationContext); - auto output = std::move(AD_FWD(inputIdTable)).cloneOrMove(); + auto output = std::move(inputIdTable).cloneOrMove(); output.addEmptyColumn(); auto outputColumn = output.getColumn(output.numColumns() - 1); @@ -158,7 +158,8 @@ IdTable Bind::computeExpressionBind( constexpr bool isConstant = sparqlExpression::isConstantResult; auto resultGenerator = sparqlExpression::detail::makeGenerator( - std::forward(singleResult), outputColumn.size(), &evaluationContext); + std::forward(singleResult), outputColumn.size(), + &evaluationContext); if constexpr (isConstant) { auto it = resultGenerator.begin(); diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp index bb69d70958..56c9c313c2 100644 --- a/test/engine/BindTest.cpp +++ b/test/engine/BindTest.cpp @@ -25,44 +25,40 @@ Bind makeBindForIdTable(QueryExecutionContext* qec, IdTable idTable) { Variable{"?b"}}}; } -IdTable getSingleIdTable(cppcoro::generator& generator) { - std::optional result = std::nullopt; - for (IdTable& idTable : generator) { - if (result.has_value()) { - ADD_FAILURE() << "More than one IdTable was generated"; - break; - } - result = std::move(idTable); - } - if (!result.has_value()) { - throw std::runtime_error{"No IdTable was generated"}; - } - return std::move(result).value(); -} -} // namespace - -// _____________________________________________________________________________ -TEST(Bind, computeResult) { - auto* qec = ad_utility::testing::getQec(); - Bind bind = - makeBindForIdTable(qec, makeIdTableFromVector({{1}, {2}, {3}, {4}})); +void expectBindYieldsIdTable( + QueryExecutionContext* qec, Bind& bind, const IdTable& expected, + ad_utility::source_location loc = ad_utility::source_location::current()) { + auto trace = generateLocationTrace(loc); { qec->getQueryTreeCache().clearAll(); auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); ASSERT_TRUE(result->isFullyMaterialized()); - EXPECT_EQ(result->idTable(), - makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}})); + EXPECT_EQ(result->idTable(), expected); } { qec->getQueryTreeCache().clearAll(); auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); ASSERT_FALSE(result->isFullyMaterialized()); - EXPECT_EQ(getSingleIdTable(result->idTables()), - makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}})); + auto& idTables = result->idTables(); + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(*iterator, expected); + EXPECT_EQ(++iterator, idTables.end()); } } +} // namespace + +// _____________________________________________________________________________ +TEST(Bind, computeResult) { + auto* qec = ad_utility::testing::getQec(); + Bind bind = + makeBindForIdTable(qec, makeIdTableFromVector({{1}, {2}, {3}, {4}})); + + expectBindYieldsIdTable( + qec, bind, makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}})); +} // _____________________________________________________________________________ TEST(Bind, computeResultWithTableWithoutRows) { @@ -70,21 +66,8 @@ TEST(Bind, computeResultWithTableWithoutRows) { Bind bind = makeBindForIdTable( qec, IdTable{1, ad_utility::makeUnlimitedAllocator()}); - { - qec->getQueryTreeCache().clearAll(); - auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); - ASSERT_TRUE(result->isFullyMaterialized()); - EXPECT_EQ(result->idTable(), - (IdTable{2, ad_utility::makeUnlimitedAllocator()})); - } - - { - qec->getQueryTreeCache().clearAll(); - auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); - ASSERT_FALSE(result->isFullyMaterialized()); - EXPECT_EQ(getSingleIdTable(result->idTables()), - (IdTable{2, ad_utility::makeUnlimitedAllocator()})); - } + expectBindYieldsIdTable(qec, bind, + IdTable{2, ad_utility::makeUnlimitedAllocator()}); } // _____________________________________________________________________________ @@ -99,18 +82,5 @@ TEST(Bind, computeResultWithTableWithoutColumns) { {SparqlExpressionPimpl{std::make_unique(val), "42 as ?b"}, Variable{"?b"}}}; - { - qec->getQueryTreeCache().clearAll(); - auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); - ASSERT_TRUE(result->isFullyMaterialized()); - EXPECT_EQ(result->idTable(), makeIdTableFromVector({{val}, {val}})); - } - - { - qec->getQueryTreeCache().clearAll(); - auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); - ASSERT_FALSE(result->isFullyMaterialized()); - EXPECT_EQ(getSingleIdTable(result->idTables()), - makeIdTableFromVector({{val}, {val}})); - } + expectBindYieldsIdTable(qec, bind, makeIdTableFromVector({{val}, {val}})); } From cefc8cd523d847abfe54ef81713ff55481fa74e1 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 11 Oct 2024 19:53:59 +0200 Subject: [PATCH 08/10] Add lazy output option for non-lazy input --- src/engine/Bind.cpp | 44 +++++++++++++++++++++++++++++++++---- src/engine/Bind.h | 8 ++++++- test/engine/BindTest.cpp | 47 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 5 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index da9bac1597..e29d54b390 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -80,19 +80,46 @@ std::vector Bind::getChildren() { return {_subtree.get()}; } +// _____________________________________________________________________________ +IdTable Bind::cloneSubView(const IdTable& idTable, + const std::pair& subrange) { + IdTable result(idTable.numColumns(), idTable.getAllocator()); + result.resize(subrange.second - subrange.first); + std::ranges::copy(idTable.begin() + subrange.first, + idTable.begin() + subrange.second, result.begin()); + return result; +} + // _____________________________________________________________________________ ProtoResult Bind::computeResult(bool requestLaziness) { LOG(DEBUG) << "Get input to BIND operation..." << std::endl; std::shared_ptr subRes = _subtree->getResult(requestLaziness); LOG(DEBUG) << "Got input to Bind operation." << std::endl; - auto applyBind = [this, subRes](auto&& idTable, LocalVocab* localVocab) { + auto applyBind = [this, subRes]( + auto&& idTable, LocalVocab* localVocab, + std::optional> subrange = + std::nullopt) { return computeExpressionBind(localVocab, AD_FWD(idTable), subRes->localVocab(), - _bind._expression.getPimpl()); + _bind._expression.getPimpl(), subrange); }; if (subRes->isFullyMaterialized()) { + if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) { + auto localVocab = + std::make_shared(subRes->getCopyOfLocalVocab()); + auto generator = [](std::shared_ptr vocab, auto applyBind, + std::shared_ptr result) + -> cppcoro::generator { + size_t size = result->idTable().size(); + for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) { + co_yield applyBind(result->idTable(), vocab.get(), + {{offset, std::min(size, offset + CHUNK_SIZE)}}); + } + }(localVocab, std::move(applyBind), std::move(subRes)); + return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + } // Make a deep copy of the local vocab from `subRes` and then add to it (in // case BIND adds a new word or words). // @@ -123,16 +150,25 @@ IdTable Bind::computeExpressionBind( LocalVocab* outputLocalVocab, ad_utility::SimilarTo auto&& inputIdTable, const LocalVocab& inputLocalVocab, - sparqlExpression::SparqlExpression* expression) const { + sparqlExpression::SparqlExpression* expression, + std::optional> subrange) const { sparqlExpression::EvaluationContext evaluationContext( *getExecutionContext(), _subtree->getVariableColumns(), inputIdTable, getExecutionContext()->getAllocator(), inputLocalVocab, cancellationHandle_, deadline_); + if (subrange.has_value()) { + auto [beginIndex, endIndex] = subrange.value(); + evaluationContext._beginIndex = beginIndex; + evaluationContext._endIndex = endIndex; + } + sparqlExpression::ExpressionResult expressionResult = expression->evaluate(&evaluationContext); - auto output = std::move(inputIdTable).cloneOrMove(); + auto output = subrange.has_value() + ? cloneSubView(inputIdTable, subrange.value()) + : std::move(inputIdTable).cloneOrMove(); output.addEmptyColumn(); auto outputColumn = output.getColumn(output.numColumns() - 1); diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 22211763ee..c4ac3ae8fe 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -11,6 +11,8 @@ /// BIND operation, currently only supports a very limited subset of expressions class Bind : public Operation { public: + static constexpr size_t CHUNK_SIZE = 10'000; + Bind(QueryExecutionContext* qec, std::shared_ptr subtree, parsedQuery::Bind b) : Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {} @@ -42,12 +44,16 @@ class Bind : public Operation { private: ProtoResult computeResult(bool requestLaziness) override; + static IdTable cloneSubView(const IdTable& idTable, + const std::pair& subrange); + // Implementation for the binding of arbitrary expressions. IdTable computeExpressionBind( LocalVocab* outputLocalVocab, ad_utility::SimilarTo auto&& inputIdTable, const LocalVocab& inputLocalVocab, - sparqlExpression::SparqlExpression* expression) const; + sparqlExpression::SparqlExpression* expression, + std::optional> subrange) const; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp index 56c9c313c2..d0f4309f56 100644 --- a/test/engine/BindTest.cpp +++ b/test/engine/BindTest.cpp @@ -84,3 +84,50 @@ TEST(Bind, computeResultWithTableWithoutColumns) { expectBindYieldsIdTable(qec, bind, makeIdTableFromVector({{val}, {val}})); } + +// _____________________________________________________________________________ +TEST( + Bind, + computeResultProducesLazyResultWhenFullyMaterializedSubResultIsTooLargeAndRequested) { + auto val = Id::makeFromInt(42); + IdTable::row_type row{1}; + row[0] = val; + auto* qec = ad_utility::testing::getQec(); + IdTable table{1, ad_utility::makeUnlimitedAllocator()}; + table.resize(Bind::CHUNK_SIZE + 1); + std::ranges::fill(table, row); + auto valuesTree = ad_utility::makeExecutionTree( + qec, table.clone(), Vars{Variable{"?a"}}, false, + std::vector{}, LocalVocab{}, std::nullopt, true); + Bind bind{ + qec, + std::move(valuesTree), + {SparqlExpressionPimpl{std::make_unique(val), "42 as ?b"}, + Variable{"?b"}}}; + + table.addEmptyColumn(); + row = IdTable::row_type{2}; + row[0] = val; + row[1] = val; + std::ranges::fill(table, row); + { + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); + ASSERT_TRUE(result->isFullyMaterialized()); + EXPECT_EQ(result->idTable(), table); + } + + { + table.resize(Bind::CHUNK_SIZE); + qec->getQueryTreeCache().clearAll(); + auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); + ASSERT_FALSE(result->isFullyMaterialized()); + auto& idTables = result->idTables(); + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(*iterator, table); + ASSERT_NE(++iterator, idTables.end()); + EXPECT_EQ(*iterator, makeIdTableFromVector({{val, val}})); + EXPECT_EQ(++iterator, idTables.end()); + } +} From ff97b22c791579c505695a08e842f02866411918 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 14 Oct 2024 21:21:47 +0200 Subject: [PATCH 09/10] Address PR comments --- src/engine/Bind.cpp | 31 ++++++++++++------------------- src/engine/Bind.h | 8 ++++---- test/IdTableTest.cpp | 2 +- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index e29d54b390..86a280c497 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -123,10 +123,10 @@ ProtoResult Bind::computeResult(bool requestLaziness) { // Make a deep copy of the local vocab from `subRes` and then add to it (in // case BIND adds a new word or words). // - // TODO: In most BIND operations, nothing is added to the local vocabulary, - // so it would be more efficient to first share the pointer here (like with - // `shareLocalVocabFrom`) and only copy it when a new word is about to be - // added. Same for GROUP BY. + // Make a copy of the local vocab from`subRes`and then add to it (in case + // BIND adds new words). Note: The copy of the local vocab is shallow + // via`shared_ptr`s, so the following is also efficient if the BIND adds no + // new words. LocalVocab localVocab = subRes->getCopyOfLocalVocab(); IdTable result = applyBind(subRes->idTable(), &localVocab); LOG(DEBUG) << "BIND result computation done." << std::endl; @@ -137,7 +137,7 @@ ProtoResult Bind::computeResult(bool requestLaziness) { [](std::shared_ptr vocab, auto applyBind, std::shared_ptr result) -> cppcoro::generator { for (IdTable& idTable : result->idTables()) { - co_yield applyBind(idTable, vocab.get()); + co_yield applyBind(std::move(idTable), vocab.get()); } std::array vocabs{vocab.get(), &result->localVocab()}; *vocab = LocalVocab::merge(std::span{vocabs}); @@ -150,7 +150,7 @@ IdTable Bind::computeExpressionBind( LocalVocab* outputLocalVocab, ad_utility::SimilarTo auto&& inputIdTable, const LocalVocab& inputLocalVocab, - sparqlExpression::SparqlExpression* expression, + const sparqlExpression::SparqlExpression* expression, std::optional> subrange) const { sparqlExpression::EvaluationContext evaluationContext( *getExecutionContext(), _subtree->getVariableColumns(), inputIdTable, @@ -168,7 +168,7 @@ IdTable Bind::computeExpressionBind( auto output = subrange.has_value() ? cloneSubView(inputIdTable, subrange.value()) - : std::move(inputIdTable).cloneOrMove(); + : AD_FWD(inputIdTable).cloneOrMove(); output.addEmptyColumn(); auto outputColumn = output.getColumn(output.numColumns() - 1); @@ -181,15 +181,10 @@ IdTable Bind::computeExpressionBind( auto columnIndex = getInternallyVisibleVariableColumns().at(singleResult).columnIndex_; auto inputColumn = output.getColumn(columnIndex); - for (size_t i = 0; i < outputColumn.size(); ++i) { - outputColumn[i] = inputColumn[i]; - checkCancellation(); - } + AD_CORRECTNESS_CHECK(inputColumn.size() == outputColumn.size()); + std::ranges::copy(inputColumn, outputColumn.begin()); } else if constexpr (isStrongId) { - for (size_t i = 0; i < outputColumn.size(); ++i) { - outputColumn[i] = singleResult; - checkCancellation(); - } + std::ranges::fill(outputColumn, singleResult); } else { constexpr bool isConstant = sparqlExpression::isConstantResult; @@ -203,10 +198,8 @@ IdTable Bind::computeExpressionBind( Id constantId = sparqlExpression::detail::constantExpressionResultToId( std::move(*it), *outputLocalVocab); - for (size_t i = 0; i < outputColumn.size(); ++i) { - outputColumn[i] = constantId; - checkCancellation(); - } + checkCancellation(); + std::ranges::fill(outputColumn, constantId); } } else { size_t i = 0; diff --git a/src/engine/Bind.h b/src/engine/Bind.h index c4ac3ae8fe..786c5e5d64 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -1,6 +1,6 @@ -// -// Created by johannes on 19.04.20. -// +// Copyright 2020, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach #pragma once @@ -52,7 +52,7 @@ class Bind : public Operation { LocalVocab* outputLocalVocab, ad_utility::SimilarTo auto&& inputIdTable, const LocalVocab& inputLocalVocab, - sparqlExpression::SparqlExpression* expression, + const sparqlExpression::SparqlExpression* expression, std::optional> subrange) const; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index 4abcf6e34f..e2a83c07e5 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -1144,7 +1144,7 @@ TEST(IdTable, cloneOrMove) { source.push_back({V(1)}); source.push_back({V(2)}); - IdTable clone = static_cast(source).cloneOrMove(); + IdTable clone = std::as_const(source).cloneOrMove(); EXPECT_EQ(source, clone); From 5938519f8fe817c09ec85f71bd26d6c7ba37659c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:35:44 +0200 Subject: [PATCH 10/10] Simplify `IdTable` move and clone semantics --- src/engine/Bind.cpp | 42 +++++++++++++----------------------- src/engine/Bind.h | 6 ++---- src/engine/idTable/IdTable.h | 28 ++++++------------------ test/IdTableTest.cpp | 18 ---------------- 4 files changed, 24 insertions(+), 70 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 86a280c497..dde7e019d9 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -96,13 +96,10 @@ ProtoResult Bind::computeResult(bool requestLaziness) { std::shared_ptr subRes = _subtree->getResult(requestLaziness); LOG(DEBUG) << "Got input to Bind operation." << std::endl; - auto applyBind = [this, subRes]( - auto&& idTable, LocalVocab* localVocab, - std::optional> subrange = - std::nullopt) { - return computeExpressionBind(localVocab, AD_FWD(idTable), + auto applyBind = [this, subRes](IdTable idTable, LocalVocab* localVocab) { + return computeExpressionBind(localVocab, std::move(idTable), subRes->localVocab(), - _bind._expression.getPimpl(), subrange); + _bind._expression.getPimpl()); }; if (subRes->isFullyMaterialized()) { @@ -114,8 +111,10 @@ ProtoResult Bind::computeResult(bool requestLaziness) { -> cppcoro::generator { size_t size = result->idTable().size(); for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) { - co_yield applyBind(result->idTable(), vocab.get(), - {{offset, std::min(size, offset + CHUNK_SIZE)}}); + co_yield applyBind( + cloneSubView(result->idTable(), + {offset, std::min(size, offset + CHUNK_SIZE)}), + vocab.get()); } }(localVocab, std::move(applyBind), std::move(subRes)); return {std::move(generator), resultSortedOn(), std::move(localVocab)}; @@ -128,7 +127,7 @@ ProtoResult Bind::computeResult(bool requestLaziness) { // via`shared_ptr`s, so the following is also efficient if the BIND adds no // new words. LocalVocab localVocab = subRes->getCopyOfLocalVocab(); - IdTable result = applyBind(subRes->idTable(), &localVocab); + IdTable result = applyBind(subRes->idTable().clone(), &localVocab); LOG(DEBUG) << "BIND result computation done." << std::endl; return {std::move(result), resultSortedOn(), std::move(localVocab)}; } @@ -147,30 +146,19 @@ ProtoResult Bind::computeResult(bool requestLaziness) { // _____________________________________________________________________________ IdTable Bind::computeExpressionBind( - LocalVocab* outputLocalVocab, - ad_utility::SimilarTo auto&& inputIdTable, + LocalVocab* outputLocalVocab, IdTable idTable, const LocalVocab& inputLocalVocab, - const sparqlExpression::SparqlExpression* expression, - std::optional> subrange) const { + const sparqlExpression::SparqlExpression* expression) const { sparqlExpression::EvaluationContext evaluationContext( - *getExecutionContext(), _subtree->getVariableColumns(), inputIdTable, + *getExecutionContext(), _subtree->getVariableColumns(), idTable, getExecutionContext()->getAllocator(), inputLocalVocab, cancellationHandle_, deadline_); - if (subrange.has_value()) { - auto [beginIndex, endIndex] = subrange.value(); - evaluationContext._beginIndex = beginIndex; - evaluationContext._endIndex = endIndex; - } - sparqlExpression::ExpressionResult expressionResult = expression->evaluate(&evaluationContext); - auto output = subrange.has_value() - ? cloneSubView(inputIdTable, subrange.value()) - : AD_FWD(inputIdTable).cloneOrMove(); - output.addEmptyColumn(); - auto outputColumn = output.getColumn(output.numColumns() - 1); + idTable.addEmptyColumn(); + auto outputColumn = idTable.getColumn(idTable.numColumns() - 1); auto visitor = [&]( T&& singleResult) mutable { @@ -180,7 +168,7 @@ IdTable Bind::computeExpressionBind( if constexpr (isVariable) { auto columnIndex = getInternallyVisibleVariableColumns().at(singleResult).columnIndex_; - auto inputColumn = output.getColumn(columnIndex); + auto inputColumn = idTable.getColumn(columnIndex); AD_CORRECTNESS_CHECK(inputColumn.size() == outputColumn.size()); std::ranges::copy(inputColumn, outputColumn.begin()); } else if constexpr (isStrongId) { @@ -217,5 +205,5 @@ IdTable Bind::computeExpressionBind( std::visit(visitor, std::move(expressionResult)); - return output; + return idTable; } diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 786c5e5d64..eeaafaf3ed 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -49,11 +49,9 @@ class Bind : public Operation { // Implementation for the binding of arbitrary expressions. IdTable computeExpressionBind( - LocalVocab* outputLocalVocab, - ad_utility::SimilarTo auto&& inputIdTable, + LocalVocab* outputLocalVocab, IdTable idTable, const LocalVocab& inputLocalVocab, - const sparqlExpression::SparqlExpression* expression, - std::optional> subrange) const; + const sparqlExpression::SparqlExpression* expression) const; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index 264c33c9c1..29a0257cac 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -435,27 +435,13 @@ class IdTable { push_back(std::ranges::ref_view{newRow}); } - // Same semantics as clone, but if we have a temporary, we can move instead. - IdTable cloneOrMove() const& requires( - !isView && std::is_copy_constructible_v && - std::is_copy_constructible_v) { - return clone(); - } - - // Optimization for rvalues: We can move the data instead of copying it. - IdTable cloneOrMove() && - requires(!isView && std::is_copy_constructible_v && - std::is_copy_constructible_v) { - return std::move(*this); - } - - // Create a deep copy of this `IdTable` that owns its memory. In most - // cases this behaves exactly like the copy constructor with the following - // exception: If `this` is a view (because the `isView` template parameter - // is `true`), then the copy constructor will also create a (const and - // non-owning) view, but `clone` will create a mutable deep copy of the - // data that the view points to - IdTable clone() const + // Create a deep copy of this `IdTable` that owns its memory. In most cases + // this behaves exactly like the copy constructor with the following + // exception: If `this` is a view (because the `isView` template parameter is + // `true`), then the copy constructor will also create a (const and + // non-owning) view, but `clone` will create a mutable deep copy of the data + // that the view points to + IdTable clone() const requires std::is_copy_constructible_v && std::is_copy_constructible_v { Storage storage; diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index e2a83c07e5..34b1ad3072 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -1136,24 +1136,6 @@ TEST(IdTable, addEmptyColumn) { EXPECT_EQ(table.getColumn(1).size(), 2); } -// _____________________________________________________________________________ -TEST(IdTable, cloneOrMove) { - using ::testing::ElementsAre; - using ::testing::Eq; - IdTable source{1, ad_utility::makeUnlimitedAllocator()}; - source.push_back({V(1)}); - source.push_back({V(2)}); - - IdTable clone = std::as_const(source).cloneOrMove(); - - EXPECT_EQ(source, clone); - - IdTable moved = std::move(source).cloneOrMove(); - - EXPECT_EQ(clone, moved); - EXPECT_EQ(source.numRows(), 0); -} - // Check that we can completely instantiate `IdTable`s with a different value // type and a different underlying storage.