Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring preliminaries for lazy operations (Part 1) #1352

Merged
merged 11 commits into from
May 23, 2024
6 changes: 3 additions & 3 deletions src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ std::vector<QueryExecutionTree*> Bind::getChildren() {
}

// _____________________________________________________________________________
ResultTable Bind::computeResult() {
Result Bind::computeResult([[maybe_unused]] bool requestLaziness) {
using std::endl;
LOG(DEBUG) << "Get input to BIND operation..." << endl;
shared_ptr<const ResultTable> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = _subtree->getResult();
LOG(DEBUG) << "Got input to Bind operation." << endl;
IdTable idTable{getExecutionContext()->getAllocator()};

Expand Down Expand Up @@ -114,7 +114,7 @@ ResultTable Bind::computeResult() {
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void Bind::computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const ResultTable& inputResultTable,
const Result& inputResultTable,
sparqlExpression::SparqlExpression* expression) const {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Bind.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ class Bind : public Operation {
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;

private:
ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;

// Implementation for the binding of arbitrary expressions.
template <size_t IN_WIDTH, size_t OUT_WIDTH>
void computeExpressionBind(
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
const ResultTable& inputResultTable,
const Result& inputResultTable,
sparqlExpression::SparqlExpression* expression) const;

[[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ add_subdirectory(sparqlExpressions)
add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp)
qlever_target_link_libraries(SortPerformanceEstimator)
add_library(engine
Engine.cpp QueryExecutionTree.cpp Operation.cpp ResultTable.cpp LocalVocab.cpp
Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
IndexScan.cpp Join.cpp Sort.cpp
Distinct.cpp OrderBy.cpp Filter.cpp
Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp
Expand Down
16 changes: 9 additions & 7 deletions src/engine/CartesianProductJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,11 @@ void CartesianProductJoin::writeResultColumn(std::span<Id> targetColumn,
}
}
// ____________________________________________________________________________
ResultTable CartesianProductJoin::computeResult() {
Result CartesianProductJoin::computeResult(
[[maybe_unused]] bool requestLaziness) {
IdTable result{getExecutionContext()->getAllocator()};
result.setNumColumns(getResultWidth());
std::vector<std::shared_ptr<const ResultTable>> subResults;
std::vector<std::shared_ptr<const Result>> subResults;

// We don't need to fully materialize the child results if we have a LIMIT
// specified and an OFFSET of 0.
Expand All @@ -154,21 +155,22 @@ ResultTable CartesianProductJoin::computeResult() {
}
subResults.push_back(child.getResult());
// Early stopping: If one of the results is empty, we can stop early.
if (subResults.back()->size() == 0) {
if (subResults.back()->idTable().size() == 0) {
break;
}
// Example for the following calculation: If we have a LIMIT of 1000 and
// the first child already has a result of size 100, then the second child
// needs to evaluate only its first 10 results. The +1 is because integer
// divisions are rounded down by default.
if (limitIfPresent.has_value()) {
limitIfPresent.value()._limit =
limitIfPresent.value()._limit.value() / subResults.back()->size() + 1;
limitIfPresent.value()._limit = limitIfPresent.value()._limit.value() /
subResults.back()->idTable().size() +
1;
}
}

auto sizesView = std::views::transform(
subResults, [](const auto& child) { return child->size(); });
subResults, [](const auto& child) { return child->idTable().size(); });
auto totalResultSize = std::accumulate(sizesView.begin(), sizesView.end(),
1UL, std::multiplies{});

Expand Down Expand Up @@ -210,7 +212,7 @@ ResultTable CartesianProductJoin::computeResult() {
auto subResultsDeref = std::views::transform(
subResults, [](auto& x) -> decltype(auto) { return *x; });
return {std::move(result), resultSortedOn(),
ResultTable::getMergedLocalVocab(subResultsDeref)};
Result::getMergedLocalVocab(subResultsDeref)};
}

// ____________________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CartesianProductJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class CartesianProductJoin : public Operation {

private:
//! Compute the result of the query-subtree rooted at this element..
ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;

// Copy each element from the `inputColumn` `groupSize` times to the
// `targetColumn`. Repeat until the `targetColumn` is copletely filled. Skip
Expand Down
5 changes: 3 additions & 2 deletions src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ size_t CountAvailablePredicates::getCostEstimate() {
}

// _____________________________________________________________________________
ResultTable CountAvailablePredicates::computeResult() {
Result CountAvailablePredicates::computeResult(
[[maybe_unused]] bool requestLaziness) {
LOG(DEBUG) << "CountAvailablePredicates result computation..." << std::endl;
IdTable idTable{getExecutionContext()->getAllocator()};
idTable.setNumColumns(2);
Expand Down Expand Up @@ -137,7 +138,7 @@ ResultTable CountAvailablePredicates::computeResult() {
patterns);
return {std::move(idTable), resultSortedOn(), LocalVocab{}};
} else {
std::shared_ptr<const ResultTable> subresult = subtree_->getResult();
std::shared_ptr<const Result> subresult = subtree_->getResult();
LOG(DEBUG) << "CountAvailablePredicates subresult computation done."
<< std::endl;

Expand Down
6 changes: 3 additions & 3 deletions src/engine/CountAvailablePredicates.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
using std::string;
using std::vector;

// This Operation takes a ResultTable with at least one column containing ids,
// and a column index referring to such a column. It then creates a ResultTable
// This Operation takes a Result with at least one column containing ids,
// and a column index referring to such a column. It then creates a Result
// containing two columns, the first one filled with the ids of all predicates
// for which there is an entry in the index with one of the entities in the
// specified input column as its subject. The second output column contains a
Expand Down Expand Up @@ -103,6 +103,6 @@ class CountAvailablePredicates : public Operation {
void computePatternTrickAllEntities(
IdTable* result, const CompactVectorOfStrings<Id>& patterns) const;

ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;
[[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override;
};
5 changes: 3 additions & 2 deletions src/engine/Distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <sstream>

#include "engine/CallFixedSize.h"
#include "engine/Engine.h"
#include "engine/QueryExecutionTree.h"

using std::endl;
Expand Down Expand Up @@ -37,10 +38,10 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const {
}

// _____________________________________________________________________________
ResultTable Distinct::computeResult() {
Result Distinct::computeResult([[maybe_unused]] bool requestLaziness) {
IdTable idTable{getExecutionContext()->getAllocator()};
LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl;
shared_ptr<const ResultTable> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = _subtree->getResult();

LOG(DEBUG) << "Distinct result computation..." << endl;
idTable.setNumColumns(subRes->idTable().numColumns());
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Distinct.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Distinct : public Operation {
[[nodiscard]] string getCacheKeyImpl() const override;

private:
virtual ResultTable computeResult() override;
virtual Result computeResult([[maybe_unused]] bool requestLaziness) override;

VariableToColumnMap computeVariableToColumnMap() const override;
};
26 changes: 12 additions & 14 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cppcoro::generator<QueryExecutionTree::StringTriple>
ExportQueryExecutionTrees::constructQueryResultToTriples(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset, std::shared_ptr<const ResultTable> res,
LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> res,
CancellationHandle cancellationHandle) {
for (size_t i : getRowIndices(limitAndOffset, res->idTable())) {
ConstructQueryExportContext context{i, *res, qet.getVariableColumns(),
Expand Down Expand Up @@ -57,7 +57,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
resultTable->logResultSize();
auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples(
Expand Down Expand Up @@ -91,8 +91,7 @@ nlohmann::json
ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
const LimitOffsetClause& limitAndOffset,
std::shared_ptr<const ResultTable> res,
const LimitOffsetClause& limitAndOffset, std::shared_ptr<const Result> res,
CancellationHandle cancellationHandle) {
auto generator = constructQueryResultToTriples(qet, constructTriples,
limitAndOffset, std::move(res),
Expand All @@ -110,7 +109,7 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON(
nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray(
const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset,
const QueryExecutionTree::ColumnIndicesAndTypes& columns,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
AD_CORRECTNESS_CHECK(resultTable != nullptr);
const IdTable& data = resultTable->idTable();
Expand Down Expand Up @@ -268,7 +267,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON(
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
using nlohmann::json;

Expand Down Expand Up @@ -388,7 +387,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON(
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
AD_CORRECTNESS_CHECK(resultTable != nullptr);
LOG(DEBUG) << "Resolving strings for finished binary result...\n";
Expand Down Expand Up @@ -418,7 +417,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream(

// This call triggers the possibly expensive computation of the query result
// unless the result is already cached.
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();
resultTable->logResultSize();
LOG(DEBUG) << "Converting result IDs to their corresponding strings ..."
<< std::endl;
Expand Down Expand Up @@ -563,7 +562,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::
selectClause.getSelectedVariablesAsStrings();
// This call triggers the possibly expensive computation of the query result
// unless the result is already cached.
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();

// In the XML format, the variables don't include the question mark.
auto varsWithoutQuestionMark = std::views::transform(
Expand Down Expand Up @@ -605,8 +604,7 @@ ad_utility::streams::stream_generator
ExportQueryExecutionTrees::constructQueryResultToStream(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset,
std::shared_ptr<const ResultTable> resultTable,
LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle) {
static_assert(format == MediaType::octetStream || format == MediaType::csv ||
format == MediaType::tsv || format == MediaType::sparqlXml);
Expand Down Expand Up @@ -638,11 +636,11 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON(
const ParsedQuery& query, const QueryExecutionTree& qet,
const ad_utility::Timer& requestTimer, uint64_t maxSend,
CancellationHandle cancellationHandle) {
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();
resultTable->logResultSize();
auto timeResultComputation = requestTimer.msecs();

size_t resultSize = resultTable->size();
size_t resultSize = resultTable->idTable().size();

nlohmann::json j;

Expand Down Expand Up @@ -725,7 +723,7 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON(
AD_THROW(
"SPARQL-compliant JSON format is only supported for SELECT queries");
}
shared_ptr<const ResultTable> resultTable = qet.getResult();
std::shared_ptr<const Result> resultTable = qet.getResult();
resultTable->logResultSize();
nlohmann::json j;
auto limitAndOffset = query._limitOffset;
Expand Down
13 changes: 6 additions & 7 deletions src/engine/ExportQueryExecutionTrees.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class ExportQueryExecutionTrees {
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

/**
Expand All @@ -132,31 +132,30 @@ class ExportQueryExecutionTrees {
static nlohmann::json idTableToQLeverJSONArray(
const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset,
const QueryExecutionTree::ColumnIndicesAndTypes& columns,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

// ___________________________________________________________________________
static nlohmann::json constructQueryResultBindingsToQLeverJSON(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
const LimitOffsetClause& limitAndOffset,
std::shared_ptr<const ResultTable> res,
CancellationHandle cancellationHandle);
std::shared_ptr<const Result> res, CancellationHandle cancellationHandle);

// Generate an RDF graph for a CONSTRUCT query.
static cppcoro::generator<QueryExecutionTree::StringTriple>
constructQueryResultToTriples(
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset, std::shared_ptr<const ResultTable> res,
LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> res,
CancellationHandle cancellationHandle);

// ___________________________________________________________________________
static nlohmann::json selectQueryResultToSparqlJSON(
const QueryExecutionTree& qet,
const parsedQuery::SelectClause& selectClause,
const LimitOffsetClause& limitAndOffset,
shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

// ___________________________________________________________________________
Expand All @@ -165,7 +164,7 @@ class ExportQueryExecutionTrees {
const QueryExecutionTree& qet,
const ad_utility::sparql_types::Triples& constructTriples,
LimitOffsetClause limitAndOffset,
std::shared_ptr<const ResultTable> resultTable,
std::shared_ptr<const Result> resultTable,
CancellationHandle cancellationHandle);

// _____________________________________________________________________________
Expand Down
6 changes: 3 additions & 3 deletions src/engine/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ string Filter::getDescriptor() const {
}

// _____________________________________________________________________________
ResultTable Filter::computeResult() {
Result Filter::computeResult([[maybe_unused]] bool requestLaziness) {
LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl;
shared_ptr<const ResultTable> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = _subtree->getResult();
LOG(DEBUG) << "Filter result computation..." << endl;
checkCancellation();

Expand All @@ -63,7 +63,7 @@ ResultTable Filter::computeResult() {
// _____________________________________________________________________________
template <size_t WIDTH>
void Filter::computeFilterImpl(IdTable* outputIdTable,
const ResultTable& inputResultTable) {
const Result& inputResultTable) {
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ class Filter : public Operation {
return _subtree->getVariableColumns();
}

ResultTable computeResult() override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;

template <size_t WIDTH>
void computeFilterImpl(IdTable* outputIdTable,
const ResultTable& inputResultTable);
const Result& inputResultTable);
};
8 changes: 4 additions & 4 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ void GroupBy::processGroup(
* @param blockEnd Where the group ends.
* @param input The input Table.
* @param result
* @param inTable The input ResultTable, which is required for its local
* @param inTable The input Result, which is required for its local
* vocabulary
* @param outTable The output ResultTable, the vocabulary of which needs to be
* @param outTable The output Result, the vocabulary of which needs to be
* expanded for GROUP_CONCAT aggregates
* @param distinctHashSet An empty hash set. This is only passed in as an
* argument to allow for efficient reusage of its
Expand Down Expand Up @@ -309,7 +309,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput,
*dynResult = std::move(result).toDynamic();
}

ResultTable GroupBy::computeResult() {
Result GroupBy::computeResult([[maybe_unused]] bool requestLaziness) {
LOG(DEBUG) << "GroupBy result computation..." << std::endl;

IdTable idTable{getExecutionContext()->getAllocator()};
Expand All @@ -335,7 +335,7 @@ ResultTable GroupBy::computeResult() {
auto hashMapOptimizationParams =
checkIfHashMapOptimizationPossible(aggregates);

std::shared_ptr<const ResultTable> subresult;
std::shared_ptr<const Result> subresult;
if (hashMapOptimizationParams.has_value()) {
const auto* child = _subtree->getRootOperation()->getChildren().at(0);
// Skip sorting
Expand Down
Loading
Loading