Skip to content

Commit

Permalink
Provide framework for generic lazily evaluated operation results (#1350)
Browse files Browse the repository at this point in the history
QLever now supports lazy operations, that can produce their result in batches instead of fully materializing them. This PR implements the general infrastructure needed for lazy evaluation, and provides a lazy implementation of the `IndexScan` and `Filter` operation. This means that queries that consist of a single triple and one or more subsequent FILTER clauses will not materialize the full index scan.
  • Loading branch information
RobinTF authored Aug 22, 2024
1 parent 8cd8a91 commit f986808
Show file tree
Hide file tree
Showing 29 changed files with 2,639 additions and 435 deletions.
303 changes: 169 additions & 134 deletions src/engine/ExportQueryExecutionTrees.cpp

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions src/engine/ExportQueryExecutionTrees.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,34 @@ class ExportQueryExecutionTrees {
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle);

// Helper type that contains an `IdTable` and a view with related indices to
// access the `IdTable` with.
struct TableWithRange {
const IdTable& idTable_;
std::ranges::iota_view<uint64_t, uint64_t> view_;
};

// Yield all `IdTables` provided by the given `result`.
static cppcoro::generator<const IdTable&> getIdTables(const Result& result);

// Return a range that contains the indices of the rows that have to be
// exported from the `idTable` given the `LimitOffsetClause`. It takes into
// account the LIMIT, the OFFSET, and the actual size of the `idTable`
static cppcoro::generator<TableWithRange> getRowIndices(
LimitOffsetClause limitOffset, const Result& result);

FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator);
FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator);
FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable);
FRIEND_TEST(ExportQueryExecutionTrees,
ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped);
FRIEND_TEST(ExportQueryExecutionTrees,
ensureCorrectSlicingOfIdTablesWhenLastIsSkipped);
FRIEND_TEST(ExportQueryExecutionTrees,
ensureCorrectSlicingOfIdTablesWhenFirstAndSecondArePartial);
FRIEND_TEST(ExportQueryExecutionTrees,
ensureCorrectSlicingOfIdTablesWhenFirstAndLastArePartial);
FRIEND_TEST(ExportQueryExecutionTrees,
ensureGeneratorIsNotConsumedWhenNotRequired);
};
62 changes: 40 additions & 22 deletions src/engine/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,41 +43,59 @@ string Filter::getDescriptor() const {
}

// _____________________________________________________________________________
ProtoResult Filter::computeResult([[maybe_unused]] bool requestLaziness) {
ProtoResult Filter::computeResult(bool requestLaziness) {
LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl;
std::shared_ptr<const Result> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = _subtree->getResult(requestLaziness);
LOG(DEBUG) << "Filter result computation..." << endl;
checkCancellation();

IdTable idTable{getExecutionContext()->getAllocator()};
idTable.setNumColumns(subRes->idTable().numColumns());
if (subRes->isFullyMaterialized()) {
IdTable result = filterIdTable(subRes, subRes->idTable());
LOG(DEBUG) << "Filter result computation done." << endl;

return {std::move(result), resultSortedOn(), subRes->getSharedLocalVocab()};
}
auto localVocab = subRes->getSharedLocalVocab();
return {[](auto subRes, auto* self) -> cppcoro::generator<IdTable> {
for (IdTable& idTable : subRes->idTables()) {
IdTable result = self->filterIdTable(subRes, idTable);
co_yield result;
}
}(std::move(subRes), this),
resultSortedOn(), std::move(localVocab)};
}

size_t width = idTable.numColumns();
CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, &idTable, *subRes);
LOG(DEBUG) << "Filter result computation done." << endl;
checkCancellation();
// _____________________________________________________________________________
IdTable Filter::filterIdTable(const std::shared_ptr<const Result>& subRes,
const IdTable& idTable) {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(), idTable,
getExecutionContext()->getAllocator(), subRes->localVocab(),
cancellationHandle_, deadline_);

return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()};
// TODO<joka921> This should be a mandatory argument to the
// EvaluationContext constructor.
evaluationContext._columnsByWhichResultIsSorted = subRes->sortedBy();

size_t width = evaluationContext._inputTable.numColumns();
IdTable result = CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this,
evaluationContext);
checkCancellation();
return result;
}

// _____________________________________________________________________________
template <size_t WIDTH>
void Filter::computeFilterImpl(IdTable* outputIdTable,
const Result& inputResultTable) {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
inputResultTable.localVocab(), cancellationHandle_, deadline_);

// TODO<joka921> This should be a mandatory argument to the EvaluationContext
// constructor.
evaluationContext._columnsByWhichResultIsSorted = inputResultTable.sortedBy();
IdTable Filter::computeFilterImpl(
sparqlExpression::EvaluationContext& evaluationContext) {
IdTable idTable{getExecutionContext()->getAllocator()};
idTable.setNumColumns(evaluationContext._inputTable.numColumns());

sparqlExpression::ExpressionResult expressionResult =
_expression.getPimpl()->evaluate(&evaluationContext);

const auto input = inputResultTable.idTable().asStaticView<WIDTH>();
auto output = std::move(*outputIdTable).toStatic<WIDTH>();
const auto input = evaluationContext._inputTable.asStaticView<WIDTH>();
auto output = std::move(idTable).toStatic<WIDTH>();
// Clang 17 seems to incorrectly deduce the type, so try to trick it
std::remove_const_t<decltype(output)>& output2 = output;

Expand Down Expand Up @@ -123,7 +141,7 @@ void Filter::computeFilterImpl(IdTable* outputIdTable,

std::visit(visitor, std::move(expressionResult));

*outputIdTable = std::move(output).toDynamic();
return std::move(output).toDynamic();
}

// _____________________________________________________________________________
Expand Down
12 changes: 9 additions & 3 deletions src/engine/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,15 @@ class Filter : public Operation {
return _subtree->getVariableColumns();
}

ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
ProtoResult computeResult(bool requestLaziness) override;

// Perform the actual filter operation of the data provided by
// `evaluationContext`.
template <size_t WIDTH>
void computeFilterImpl(IdTable* outputIdTable,
const Result& inputResultTable);
IdTable computeFilterImpl(
sparqlExpression::EvaluationContext& evaluationContext);

// Run `computeFilterImpl` on the provided IdTable
IdTable filterIdTable(const std::shared_ptr<const Result>& subRes,
const IdTable& idTable);
};
21 changes: 20 additions & 1 deletion src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,28 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const {
std::ranges::for_each(additionalVariables_, addCol);
return variableToColumnMap;
}

// _____________________________________________________________________________
ProtoResult IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
cppcoro::generator<IdTable> IndexScan::scanInChunks() const {
auto metadata = getMetadataForScan(*this);
if (!metadata.has_value()) {
co_return;
}
auto blocksSpan =
CompressedRelationReader::getBlocksFromMetadata(metadata.value());
std::vector<CompressedBlockMetadata> blocks{blocksSpan.begin(),
blocksSpan.end()};
for (IdTable& idTable : getLazyScan(*this, std::move(blocks))) {
co_yield std::move(idTable);
}
}

// _____________________________________________________________________________
ProtoResult IndexScan::computeResult(bool requestLaziness) {
LOG(DEBUG) << "IndexScan result computation...\n";
if (requestLaziness) {
return {scanInChunks(), resultSortedOn(), LocalVocab{}};
}
IdTable idTable{getExecutionContext()->getAllocator()};

using enum Permutation::Enum;
Expand Down
4 changes: 3 additions & 1 deletion src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class IndexScan final : public Operation {
ScanSpecificationAsTripleComponent getScanSpecification() const;

private:
ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
ProtoResult computeResult(bool requestLaziness) override;

vector<QueryExecutionTree*> getChildren() override { return {}; }

Expand All @@ -115,6 +115,8 @@ class IndexScan final : public Operation {

VariableToColumnMap computeVariableToColumnMap() const override;

cppcoro::generator<IdTable> scanInChunks() const;

// Helper functions for the public `getLazyScanFor...` functions (see above).
static Permutation::IdTableGenerator getLazyScan(
const IndexScan& s, std::vector<CompressedBlockMetadata> blocks);
Expand Down
Loading

0 comments on commit f986808

Please sign in to comment.