Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement lazy BIND #1543

Merged
merged 11 commits into from
Oct 15, 2024
129 changes: 80 additions & 49 deletions src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,64 +81,95 @@ std::vector<QueryExecutionTree*> Bind::getChildren() {
}

// _____________________________________________________________________________
ProtoResult Bind::computeResult([[maybe_unused]] bool requestLaziness) {
using std::endl;
LOG(DEBUG) << "Get input to BIND operation..." << endl;
std::shared_ptr<const Result> subRes = _subtree->getResult();
LOG(DEBUG) << "Got input to Bind operation." << endl;
IdTable idTable{getExecutionContext()->getAllocator()};

idTable.setNumColumns(getResultWidth());

// Make a deep copy of the local vocab from `subRes` and then add to it (in
// case BIND adds a new word or words).
//
// TODO: In most BIND operations, nothing is added to the local vocabulary, so
// it would be more efficient to first share the pointer here (like with
// `shareLocalVocabFrom`) and only copy it when a new word is about to be
// added. Same for GROUP BY.
auto localVocab = subRes->getCopyOfLocalVocab();

size_t inwidth = subRes->idTable().numColumns();
size_t outwidth = getResultWidth();

CALL_FIXED_SIZE((std::array{inwidth, outwidth}), &Bind::computeExpressionBind,
this, &idTable, &localVocab, *subRes,
_bind._expression.getPimpl());

LOG(DEBUG) << "BIND result computation done." << endl;
return {std::move(idTable), resultSortedOn(), std::move(localVocab)};
ProtoResult Bind::computeResult(bool requestLaziness) {
LOG(DEBUG) << "Get input to BIND operation..." << std::endl;
std::shared_ptr<const Result> subRes = _subtree->getResult(requestLaziness);
LOG(DEBUG) << "Got input to Bind operation." << std::endl;

auto applyBind = [this, subRes](auto&& idTable, LocalVocab* localVocab) {
size_t inwidth = idTable.numColumns();
size_t outwidth = getResultWidth();
return ad_utility::callFixedSize(
(std::array{inwidth, outwidth}),
[this, &subRes, localVocab,
&idTable]<size_t IN_WIDTH, size_t OUT_WIDTH>() {
return computeExpressionBind<IN_WIDTH, OUT_WIDTH>(
localVocab, AD_FWD(idTable), subRes->localVocab(),
_bind._expression.getPimpl());
});
};

if (subRes->isFullyMaterialized()) {
// Make a deep copy of the local vocab from `subRes` and then add to it (in
// case BIND adds a new word or words).
//
// TODO: In most BIND operations, nothing is added to the local vocabulary,
// so it would be more efficient to first share the pointer here (like with
// `shareLocalVocabFrom`) and only copy it when a new word is about to be
// added. Same for GROUP BY.
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
LocalVocab localVocab = subRes->getCopyOfLocalVocab();
IdTable result = applyBind(subRes->idTable(), &localVocab);
LOG(DEBUG) << "BIND result computation done." << std::endl;
return {std::move(result), resultSortedOn(), std::move(localVocab)};
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
}
auto localVocab = std::make_shared<LocalVocab>();
auto generator =
[](std::shared_ptr<LocalVocab> vocab, auto applyBind,
std::shared_ptr<const Result> result) -> cppcoro::generator<IdTable> {
for (IdTable& idTable : result->idTables()) {
co_yield applyBind(idTable, vocab.get());
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
}
std::array<const LocalVocab*, 2> vocabs{vocab.get(), &result->localVocab()};
*vocab = LocalVocab::merge(std::span{vocabs});
}(localVocab, std::move(applyBind), std::move(subRes));
return {std::move(generator), resultSortedOn(), std::move(localVocab)};
}

// _____________________________________________________________________________
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void Bind::computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const Result& inputResultTable,
IdTable Bind::computeExpressionBind(
LocalVocab* outputLocalVocab,
ad_utility::SimilarTo<IdTable> auto&& inputIdTable,
const LocalVocab& inputLocalVocab,
sparqlExpression::SparqlExpression* expression) const {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
inputResultTable.localVocab(), cancellationHandle_, deadline_);
*getExecutionContext(), _subtree->getVariableColumns(), inputIdTable,
getExecutionContext()->getAllocator(), inputLocalVocab,
cancellationHandle_, deadline_);

sparqlExpression::ExpressionResult expressionResult =
expression->evaluate(&evaluationContext);

const auto input = inputResultTable.idTable().asStaticView<IN_WIDTH>();
auto output = std::move(*outputIdTable).toStatic<OUT_WIDTH>();

// first initialize the first columns (they remain identical)
const auto inSize = input.size();
output.reserve(inSize);
const auto inCols = input.numColumns();
// copy the input to the first numColumns;
for (size_t i = 0; i < inSize; ++i) {
output.emplace_back();
for (size_t j = 0; j < inCols; ++j) {
output(i, j) = input(i, j);
size_t inSize = inputIdTable.size();
size_t inCols = inputIdTable.numColumns();

auto output = [this, &inputIdTable, inSize, inCols]() {
if constexpr (std::is_const_v<
std::remove_reference_t<decltype(inputIdTable)>>) {
const auto input = inputIdTable.template asStaticView<IN_WIDTH>();
auto output =
IdTable{getResultWidth(), getExecutionContext()->getAllocator()}
.template toStatic<OUT_WIDTH>();

// first initialize the first columns (they remain identical)
output.reserve(inSize);
// copy the input to the first numColumns;
for (size_t i = 0; i < inSize; ++i) {
output.emplace_back();
for (size_t j = 0; j < inCols; ++j) {
output(i, j) = input(i, j);
}
checkCancellation();
}
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
return output;
} else {
(void)this;
(void)inSize;
(void)inCols;
IdTable output = std::move(inputIdTable);
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
output.addEmptyColumn();
return std::move(output).toStatic<OUT_WIDTH>();
}
checkCancellation();
}
}();

auto visitor = [&]<sparqlExpression::SingleExpressionResult T>(
T&& singleResult) mutable {
Expand Down Expand Up @@ -190,5 +221,5 @@ void Bind::computeExpressionBind(

std::visit(visitor, std::move(expressionResult));

*outputIdTable = std::move(output).toDynamic();
return std::move(output).toDynamic();
}
14 changes: 6 additions & 8 deletions src/engine/Bind.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
// Created by johannes on 19.04.20.
//

#ifndef QLEVER_BIND_H
#define QLEVER_BIND_H
#pragma once

#include "engine/Operation.h"
#include "engine/sparqlExpressions/SparqlExpressionPimpl.h"
Expand Down Expand Up @@ -46,16 +45,15 @@ class Bind : public Operation {
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;

private:
ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
ProtoResult computeResult(bool requestLaziness) override;

// Implementation for the binding of arbitrary expressions.
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const Result& inputResultTable,
IdTable computeExpressionBind(
LocalVocab* outputLocalVocab,
ad_utility::SimilarTo<IdTable> auto&& inputIdTable,
const LocalVocab& inputLocalVocab,
sparqlExpression::SparqlExpression* expression) const;

[[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override;
};

#endif // QLEVER_BIND_H
6 changes: 6 additions & 0 deletions src/engine/idTable/IdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,12 @@ class IdTable {
data().resize(numColumns, ColumnStorage{allocator_});
}

// Add a new empty column to the table.
void addEmptyColumn() requires columnsAreAllocatable && isDynamic {
data().emplace_back(size(), allocator_);
++numColumns_;
}

// The number of rows in the table. We deliberately have an explicitly named
// function `numRows` as well as a generic `size` function because the latter
// can be used to write generic code, for example when using STL algorithms on
Expand Down
17 changes: 17 additions & 0 deletions test/IdTableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,23 @@ TEST(IdTable, constructorsAreSfinaeFriendly) {
static_assert(std::is_constructible_v<IntTable, size_t>);
}

// _____________________________________________________________________________
TEST(IdTable, addEmptyColumn) {
using ::testing::ElementsAre;
using ::testing::Eq;
IdTable table{1, ad_utility::makeUnlimitedAllocator<Id>()};
table.push_back({V(1)});
table.push_back({V(2)});

table.addEmptyColumn();

EXPECT_EQ(table.numColumns(), 2);
EXPECT_THAT(table.getColumn(0), ElementsAre(V(1), V(2)));
// The new column is uninitialized, so we can't make any more specific
// assertions about its content here.
EXPECT_EQ(table.getColumn(1).size(), 2);
}

// Check that we can completely instantiate `IdTable`s with a different value
// type and a different underlying storage.

Expand Down
116 changes: 116 additions & 0 deletions test/engine/BindTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Robin Textor-Falconi <[email protected]>

#include <gtest/gtest.h>

#include "../util/IdTableHelpers.h"
#include "../util/IndexTestHelpers.h"
#include "./ValuesForTesting.h"
#include "engine/Bind.h"
#include "engine/sparqlExpressions/LiteralExpression.h"

using namespace sparqlExpression;
using Vars = std::vector<std::optional<Variable>>;

namespace {
Bind makeBindForIdTable(QueryExecutionContext* qec, IdTable idTable) {
auto valuesTree = ad_utility::makeExecutionTree<ValuesForTesting>(
qec, std::move(idTable), Vars{Variable{"?a"}});
return {
qec,
std::move(valuesTree),
{SparqlExpressionPimpl{
std::make_unique<VariableExpression>(Variable{"?a"}), "?a as ?b"},
Variable{"?b"}}};
}

IdTable getSingleIdTable(cppcoro::generator<IdTable>& generator) {
std::optional<IdTable> result = std::nullopt;
for (IdTable& idTable : generator) {
if (result.has_value()) {
ADD_FAILURE() << "More than one IdTable was generated";
break;
}
result = std::move(idTable);
}
if (!result.has_value()) {
throw std::runtime_error{"No IdTable was generated"};
}
return std::move(result).value();
}
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
} // namespace

// _____________________________________________________________________________
TEST(Bind, computeResult) {
auto* qec = ad_utility::testing::getQec();
Bind bind =
makeBindForIdTable(qec, makeIdTableFromVector({{1}, {2}, {3}, {4}}));

{
qec->getQueryTreeCache().clearAll();
auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED);
ASSERT_TRUE(result->isFullyMaterialized());
EXPECT_EQ(result->idTable(),
makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}}));
}

{
qec->getQueryTreeCache().clearAll();
auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED);
ASSERT_FALSE(result->isFullyMaterialized());
EXPECT_EQ(getSingleIdTable(result->idTables()),
makeIdTableFromVector({{1, 1}, {2, 2}, {3, 3}, {4, 4}}));
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
}
}

// _____________________________________________________________________________
TEST(Bind, computeResultWithTableWithoutRows) {
auto* qec = ad_utility::testing::getQec();
Bind bind = makeBindForIdTable(
qec, IdTable{1, ad_utility::makeUnlimitedAllocator<Id>()});

{
qec->getQueryTreeCache().clearAll();
auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED);
ASSERT_TRUE(result->isFullyMaterialized());
EXPECT_EQ(result->idTable(),
(IdTable{2, ad_utility::makeUnlimitedAllocator<Id>()}));
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
}

{
qec->getQueryTreeCache().clearAll();
auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED);
ASSERT_FALSE(result->isFullyMaterialized());
EXPECT_EQ(getSingleIdTable(result->idTables()),
(IdTable{2, ad_utility::makeUnlimitedAllocator<Id>()}));
}
}

// _____________________________________________________________________________
TEST(Bind, computeResultWithTableWithoutColumns) {
auto val = Id::makeFromInt(42);
auto* qec = ad_utility::testing::getQec();
auto valuesTree = ad_utility::makeExecutionTree<ValuesForTesting>(
qec, makeIdTableFromVector({{}, {}}), Vars{});
Bind bind{
qec,
std::move(valuesTree),
{SparqlExpressionPimpl{std::make_unique<IdExpression>(val), "42 as ?b"},
Variable{"?b"}}};

{
qec->getQueryTreeCache().clearAll();
auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED);
ASSERT_TRUE(result->isFullyMaterialized());
EXPECT_EQ(result->idTable(), makeIdTableFromVector({{val}, {val}}));
}

{
qec->getQueryTreeCache().clearAll();
auto result = bind.getResult(false, ComputationMode::LAZY_IF_SUPPORTED);
ASSERT_FALSE(result->isFullyMaterialized());
EXPECT_EQ(getSingleIdTable(result->idTables()),
makeIdTableFromVector({{val}, {val}}));
}
}
1 change: 1 addition & 0 deletions test/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ addLinkAndDiscoverTest(SpatialJoinTest engine)
addLinkAndDiscoverTest(DistinctTest engine)
addLinkAndDiscoverTest(GroupByHashMapOptimizationTest)
addLinkAndDiscoverTest(LazyGroupByTest engine)
addLinkAndDiscoverTest(BindTest engine)
Loading