Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement lazy BIND #1543

Merged
merged 11 commits into from
Oct 15, 2024
147 changes: 81 additions & 66 deletions src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,104 +81,119 @@
}

// _____________________________________________________________________________
ProtoResult Bind::computeResult([[maybe_unused]] bool requestLaziness) {
using std::endl;
LOG(DEBUG) << "Get input to BIND operation..." << endl;
std::shared_ptr<const Result> subRes = _subtree->getResult();
LOG(DEBUG) << "Got input to Bind operation." << endl;
IdTable idTable{getExecutionContext()->getAllocator()};

idTable.setNumColumns(getResultWidth());

// Make a deep copy of the local vocab from `subRes` and then add to it (in
// case BIND adds a new word or words).
//
// TODO: In most BIND operations, nothing is added to the local vocabulary, so
// it would be more efficient to first share the pointer here (like with
// `shareLocalVocabFrom`) and only copy it when a new word is about to be
// added. Same for GROUP BY.
auto localVocab = subRes->getCopyOfLocalVocab();

size_t inwidth = subRes->idTable().numColumns();
size_t outwidth = getResultWidth();

CALL_FIXED_SIZE((std::array{inwidth, outwidth}), &Bind::computeExpressionBind,
this, &idTable, &localVocab, *subRes,
_bind._expression.getPimpl());

LOG(DEBUG) << "BIND result computation done." << endl;
return {std::move(idTable), resultSortedOn(), std::move(localVocab)};
IdTable Bind::cloneSubView(const IdTable& idTable,
const std::pair<size_t, size_t>& subrange) {
IdTable result(idTable.numColumns(), idTable.getAllocator());
result.resize(subrange.second - subrange.first);
std::ranges::copy(idTable.begin() + subrange.first,
idTable.begin() + subrange.second, result.begin());
return result;
}

// _____________________________________________________________________________
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void Bind::computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const Result& inputResultTable,
sparqlExpression::SparqlExpression* expression) const {
ProtoResult Bind::computeResult(bool requestLaziness) {
LOG(DEBUG) << "Get input to BIND operation..." << std::endl;
std::shared_ptr<const Result> subRes = _subtree->getResult(requestLaziness);
LOG(DEBUG) << "Got input to Bind operation." << std::endl;

auto applyBind = [this, subRes](IdTable idTable, LocalVocab* localVocab) {
return computeExpressionBind(localVocab, std::move(idTable),
subRes->localVocab(),
_bind._expression.getPimpl());
};

if (subRes->isFullyMaterialized()) {
if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) {
auto localVocab =
std::make_shared<LocalVocab>(subRes->getCopyOfLocalVocab());
auto generator = [](std::shared_ptr<LocalVocab> vocab, auto applyBind,
std::shared_ptr<const Result> result)
-> cppcoro::generator<IdTable> {
size_t size = result->idTable().size();
for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) {
co_yield applyBind(
cloneSubView(result->idTable(),
{offset, std::min(size, offset + CHUNK_SIZE)}),
vocab.get());
}
}(localVocab, std::move(applyBind), std::move(subRes));
return {std::move(generator), resultSortedOn(), std::move(localVocab)};
}
// Make a deep copy of the local vocab from `subRes` and then add to it (in
// case BIND adds a new word or words).
//
// Make a copy of the local vocab from`subRes`and then add to it (in case
// BIND adds new words). Note: The copy of the local vocab is shallow
// via`shared_ptr`s, so the following is also efficient if the BIND adds no
// new words.
LocalVocab localVocab = subRes->getCopyOfLocalVocab();
IdTable result = applyBind(subRes->idTable().clone(), &localVocab);
LOG(DEBUG) << "BIND result computation done." << std::endl;
return {std::move(result), resultSortedOn(), std::move(localVocab)};
}
auto localVocab = std::make_shared<LocalVocab>();
auto generator =
[](std::shared_ptr<LocalVocab> vocab, auto applyBind,
std::shared_ptr<const Result> result) -> cppcoro::generator<IdTable> {
for (IdTable& idTable : result->idTables()) {
co_yield applyBind(std::move(idTable), vocab.get());
}
std::array<const LocalVocab*, 2> vocabs{vocab.get(), &result->localVocab()};
*vocab = LocalVocab::merge(std::span{vocabs});
}(localVocab, std::move(applyBind), std::move(subRes));
return {std::move(generator), resultSortedOn(), std::move(localVocab)};
}

// _____________________________________________________________________________
IdTable Bind::computeExpressionBind(
LocalVocab* outputLocalVocab, IdTable idTable,
const LocalVocab& inputLocalVocab,
const sparqlExpression::SparqlExpression* expression) const {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
inputResultTable.localVocab(), cancellationHandle_, deadline_);
*getExecutionContext(), _subtree->getVariableColumns(), idTable,
getExecutionContext()->getAllocator(), inputLocalVocab,
cancellationHandle_, deadline_);

sparqlExpression::ExpressionResult expressionResult =
expression->evaluate(&evaluationContext);

const auto input = inputResultTable.idTable().asStaticView<IN_WIDTH>();
auto output = std::move(*outputIdTable).toStatic<OUT_WIDTH>();

// first initialize the first columns (they remain identical)
const auto inSize = input.size();
output.reserve(inSize);
const auto inCols = input.numColumns();
// copy the input to the first numColumns;
for (size_t i = 0; i < inSize; ++i) {
output.emplace_back();
for (size_t j = 0; j < inCols; ++j) {
output(i, j) = input(i, j);
}
checkCancellation();
}
idTable.addEmptyColumn();
auto outputColumn = idTable.getColumn(idTable.numColumns() - 1);

auto visitor = [&]<sparqlExpression::SingleExpressionResult T>(
T&& singleResult) mutable {
constexpr static bool isVariable = std::is_same_v<T, ::Variable>;
constexpr static bool isStrongId = std::is_same_v<T, Id>;

if constexpr (isVariable) {
auto column =
auto columnIndex =
getInternallyVisibleVariableColumns().at(singleResult).columnIndex_;
for (size_t i = 0; i < inSize; ++i) {
output(i, inCols) = output(i, column);
checkCancellation();
}
auto inputColumn = idTable.getColumn(columnIndex);
AD_CORRECTNESS_CHECK(inputColumn.size() == outputColumn.size());
std::ranges::copy(inputColumn, outputColumn.begin());
} else if constexpr (isStrongId) {
for (size_t i = 0; i < inSize; ++i) {
output(i, inCols) = singleResult;
checkCancellation();
}
std::ranges::fill(outputColumn, singleResult);
} else {
constexpr bool isConstant = sparqlExpression::isConstantResult<T>;

auto resultGenerator = sparqlExpression::detail::makeGenerator(
std::forward<T>(singleResult), inSize, &evaluationContext);
std::forward<T>(singleResult), outputColumn.size(),
&evaluationContext);

if constexpr (isConstant) {
auto it = resultGenerator.begin();
if (it != resultGenerator.end()) {
Id constantId =
sparqlExpression::detail::constantExpressionResultToId(
std::move(*it), *outputLocalVocab);
for (size_t i = 0; i < inSize; ++i) {
output(i, inCols) = constantId;
checkCancellation();
}
checkCancellation();
std::ranges::fill(outputColumn, constantId);
}
} else {
size_t i = 0;
// We deliberately move the values from the generator.
for (auto& resultValue : resultGenerator) {
output(i, inCols) =
outputColumn[i] =

Check warning on line 196 in src/engine/Bind.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/Bind.cpp#L196

Added line #L196 was not covered by tests
sparqlExpression::detail::constantExpressionResultToId(
std::move(resultValue), *outputLocalVocab);
i++;
Expand All @@ -190,5 +205,5 @@

std::visit(visitor, std::move(expressionResult));

*outputIdTable = std::move(output).toDynamic();
return idTable;
}
32 changes: 14 additions & 18 deletions src/engine/Bind.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
//
// Created by johannes on 19.04.20.
//
// Copyright 2020, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#ifndef QLEVER_BIND_H
#define QLEVER_BIND_H
#pragma once

#include "engine/Operation.h"
#include "engine/sparqlExpressions/SparqlExpressionPimpl.h"
Expand All @@ -12,6 +11,8 @@
/// BIND operation, currently only supports a very limited subset of expressions
class Bind : public Operation {
public:
static constexpr size_t CHUNK_SIZE = 10'000;

Bind(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> subtree,
parsedQuery::Bind b)
: Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {}
Expand All @@ -37,25 +38,20 @@ class Bind : public Operation {
float getMultiplicity(size_t col) override;
bool knownEmptyResult() override;

// Returns the variable to which the expression will be bound
[[nodiscard]] const string& targetVariable() const {
return _bind._target.name();
}

protected:
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;

private:
ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
ProtoResult computeResult(bool requestLaziness) override;

static IdTable cloneSubView(const IdTable& idTable,
const std::pair<size_t, size_t>& subrange);

// Implementation for the binding of arbitrary expressions.
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const Result& inputResultTable,
sparqlExpression::SparqlExpression* expression) const;
IdTable computeExpressionBind(
LocalVocab* outputLocalVocab, IdTable idTable,
const LocalVocab& inputLocalVocab,
const sparqlExpression::SparqlExpression* expression) const;

[[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override;
};

#endif // QLEVER_BIND_H
6 changes: 6 additions & 0 deletions src/engine/idTable/IdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,12 @@ class IdTable {
data().resize(numColumns, ColumnStorage{allocator_});
}

// Add a new empty column to the table.
void addEmptyColumn() requires columnsAreAllocatable && isDynamic {
data().emplace_back(size(), allocator_);
++numColumns_;
}

// The number of rows in the table. We deliberately have an explicitly named
// function `numRows` as well as a generic `size` function because the latter
// can be used to write generic code, for example when using STL algorithms on
Expand Down
17 changes: 17 additions & 0 deletions test/IdTableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,23 @@ TEST(IdTable, constructorsAreSfinaeFriendly) {
static_assert(std::is_constructible_v<IntTable, size_t>);
}

// _____________________________________________________________________________
TEST(IdTable, addEmptyColumn) {
using ::testing::ElementsAre;
using ::testing::Eq;
IdTable table{1, ad_utility::makeUnlimitedAllocator<Id>()};
table.push_back({V(1)});
table.push_back({V(2)});

table.addEmptyColumn();

EXPECT_EQ(table.numColumns(), 2);
EXPECT_THAT(table.getColumn(0), ElementsAre(V(1), V(2)));
// The new column is uninitialized, so we can't make any more specific
// assertions about its content here.
EXPECT_EQ(table.getColumn(1).size(), 2);
}

// Check that we can completely instantiate `IdTable`s with a different value
// type and a different underlying storage.

Expand Down
Loading
Loading