diff --git a/e2e/queryit.py b/e2e/queryit.py index c9189a44f8..edda0eca32 100755 --- a/e2e/queryit.py +++ b/e2e/queryit.py @@ -24,6 +24,8 @@ class ErrorReportingHandler(urllib.request.BaseHandler): def http_error_default(self, req, fp, code, msg, hdrs): if code != 200: eprint(f"Status {code} for {req.full_url}") + eprint(f"Message {msg} headers {hdrs}") + eprint(fp.read()) return fp @@ -228,6 +230,9 @@ def test_check(check_dict: Dict[str, Any], result: Dict[str, Any]) -> bool: if not found: eprint("contains_row check failed:\n" + "\tdid not find %r" % gold_row) + print("Printing the first 10 rows of the result:") + for row in res[:10]: + print(row) return False elif check == 'contains_warning': for requested_warning in value: diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index 0a89ea4d51..de290c199c 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -1425,4 +1425,45 @@ queries: - contains_row: [ 3.0, 2.5] - contains_row: [ 4.0, 2.2] + - query: aggregate-in-having-clause + type: no-text + sparql: | + SELECT ?x WHERE { + VALUES (?x ?y) { (0.5 1.5) (0.5 3.5) (1.2 2.8)} + } GROUP BY ?x + HAVING (?x + COUNT(?y) > 2.3) + checks: + - num_cols: 1 + - num_rows : 1 + - selected: [ "?x" ] + - contains_row: [ 0.5 ] + + - query: aggregate-in-order-by + type: no-text + sparql: | + SELECT (AVG(?x + ?y) as ?avg) WHERE { + VALUES (?x ?y) { (0.5 1.5) (0.5 3.5) (1.2 2.8)} + } GROUP BY ?x + ORDER BY (- SUM(?y)) + checks: + - num_cols: 1 + - num_rows : 2 + - selected: [ "?avg" ] + - contains_row: [ 3.0 ] + - contains_row: [ 4.0 ] + + - query: variable-from-select-clause-reusage + type: no-text + sparql: | + SELECT (AVG(?x + ?y) as ?avg) (?avg + COUNT(?y) * 0.5 AS ?b) WHERE { + VALUES (?x ?y) { (0.5 1.5) (0.5 3.5) (1.2 2.8)} + } GROUP BY ?x + checks: + - num_cols: 2 + - num_rows : 2 + - selected: [ "?avg", "?b" ] + - contains_row: [ 3.0, 4.0 ] + - contains_row: [ 4.0, 4.5 ] + + diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index ffb39a33f5..7d7073becc 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -26,11 +26,11 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector groupByVariables, : Operation{qec}, _groupByVariables{std::move(groupByVariables)}, _aliases{std::move(aliases)} { - // sort the aliases and groupByVariables to ensure the cache key is order + // Sort the groupByVariables to ensure that the cache key is order // invariant. - std::ranges::sort(_aliases, std::less<>{}, - [](const Alias& a) { return a._target.name(); }); - + // Note: It is tempting to do the same also for the aliases, but that would + // break the case when an alias reuses a variable that was bound by a previous + // alias. std::ranges::sort(_groupByVariables, std::less<>{}, &Variable::name); auto sortColumns = computeSortColumns(subtree.get()); @@ -39,8 +39,22 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector groupByVariables, } string GroupBy::asStringImpl(size_t indent) const { - const auto varMap = getInternallyVisibleVariableColumns(); - const auto varMapInput = _subtree->getVariableColumns(); + const auto& varMap = getInternallyVisibleVariableColumns(); + auto varMapInput = _subtree->getVariableColumns(); + + // We also have to encode the variables to which alias results are stored in + // the cache key of the expressions in case they reuse a variable from the + // previous result. + auto numColumnsInput = _subtree->getResultWidth(); + for (const auto& [var, column] : varMap) { + if (!varMapInput.contains(var)) { + // It is important that the cache keys for the variables from the aliases + // do not collide with the query body, and that they are consistent. The + // constant `1000` has no deeper meaning but makes debugging easier. + varMapInput[var] = column + 1000 + numColumnsInput; + } + } + std::ostringstream os; for (size_t i = 0; i < indent; ++i) { os << " "; @@ -166,6 +180,11 @@ void GroupBy::processGroup( auto& resultEntry = result->operator()(resultRow, resultColumn); + // Copy the result to the evaluation context in case one of the following + // aliases has to reuse it. + evaluationContext._previousResultsFromSameGroup.at(resultColumn) = + sparqlExpression::copyExpressionResultIfNotVector(expressionResult); + auto visitor = [&]( T&& singleResult) mutable { constexpr static bool isStrongId = std::is_same_v; @@ -227,8 +246,15 @@ void GroupBy::doGroupBy(const IdTable& dynInput, *getExecutionContext(), columnMap, inTable->_idTable, getExecutionContext()->getAllocator(), outTable->localVocabNonConst()); + // In a GROUP BY evaluation, the expressions need to know which variables are + // grouped, and to which columns the results of the aliases are written. The + // latter information is needed if the expression of an reuses the result + // variable from a previous alias as an input. evaluationContext._groupedVariables = ad_utility::HashSet{ _groupByVariables.begin(), _groupByVariables.end()}; + evaluationContext._variableToColumnMapPreviousResults = + getInternallyVisibleVariableColumns(); + evaluationContext._previousResultsFromSameGroup.resize(getResultWidth()); auto processNextBlock = [&](size_t blockStart, size_t blockEnd) { result.emplace_back(); diff --git a/src/engine/sparqlExpressions/AggregateExpression.cpp b/src/engine/sparqlExpressions/AggregateExpression.cpp index e2487c08a0..d8a50ee911 100644 --- a/src/engine/sparqlExpressions/AggregateExpression.cpp +++ b/src/engine/sparqlExpressions/AggregateExpression.cpp @@ -40,7 +40,7 @@ AggregateExpression::children() { // _________________________________________________________________________ template -vector AggregateExpression< +vector AggregateExpression< AggregateOperation, FinalOperation>::getUnaggregatedVariables() { // This is an aggregate, so it never leaves any unaggregated variables. return {}; diff --git a/src/engine/sparqlExpressions/AggregateExpression.h b/src/engine/sparqlExpressions/AggregateExpression.h index 21061b8d4c..1c2fe0e275 100644 --- a/src/engine/sparqlExpressions/AggregateExpression.h +++ b/src/engine/sparqlExpressions/AggregateExpression.h @@ -35,7 +35,7 @@ class AggregateExpression : public SparqlExpression { std::span children() override; // _________________________________________________________________________ - vector getUnaggregatedVariables() override; + vector getUnaggregatedVariables() override; // An `AggregateExpression` (obviously) contains an aggregate. bool containsAggregate() const override { return true; } diff --git a/src/engine/sparqlExpressions/GroupConcatExpression.h b/src/engine/sparqlExpressions/GroupConcatExpression.h index b36083c3d3..8d92c1c53e 100644 --- a/src/engine/sparqlExpressions/GroupConcatExpression.h +++ b/src/engine/sparqlExpressions/GroupConcatExpression.h @@ -52,7 +52,7 @@ class GroupConcatExpression : public SparqlExpression { // A `GroupConcatExpression` is an aggregate, so it never leaves any // unaggregated variables. - vector getUnaggregatedVariables() override { return {}; } + vector getUnaggregatedVariables() override { return {}; } // A `GroupConcatExpression` is an aggregate. bool containsAggregate() const override { return true; } diff --git a/src/engine/sparqlExpressions/LiteralExpression.h b/src/engine/sparqlExpressions/LiteralExpression.h index fd2493c4b1..12e47f4ead 100644 --- a/src/engine/sparqlExpressions/LiteralExpression.h +++ b/src/engine/sparqlExpressions/LiteralExpression.h @@ -2,8 +2,7 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach -#ifndef QLEVER_LITERALEXPRESSION_H -#define QLEVER_LITERALEXPRESSION_H +#pragma once #include "engine/sparqlExpressions/SparqlExpression.h" @@ -40,22 +39,7 @@ class LiteralExpression : public SparqlExpression { } else if constexpr (std::is_same_v) { return getIdOrString(_value); } else if constexpr (std::is_same_v) { - // If a variable is grouped, then we know that it always has the same - // value and can treat it as a constant. This is not possible however when - // we are inside an aggregate, because for example `SUM(?variable)` must - // still compute the sum over the whole group. - if (context->_groupedVariables.contains(_value) && !isInsideAggregate()) { - auto column = context->getColumnIndexForVariable(_value); - const auto& table = context->_inputTable; - auto constantValue = table.at(context->_beginIndex, column); - assert((std::ranges::all_of( - table.begin() + context->_beginIndex, - table.begin() + context->_endIndex, - [&](const auto& row) { return row[column] == constantValue; }))); - return constantValue; - } else { - return _value; - } + return evaluateIfVariable(context, _value); } else { return _value; } @@ -74,9 +58,9 @@ class LiteralExpression : public SparqlExpression { } // _________________________________________________________________________ - vector getUnaggregatedVariables() override { + vector getUnaggregatedVariables() override { if constexpr (std::is_same_v) { - return {_value.name()}; + return {_value}; } else { return {}; } @@ -116,6 +100,47 @@ class LiteralExpression : public SparqlExpression { private: T _value; + + // Evaluate the expression if it is a variable expression with the given + // `variable`. The variable is passed in explicitly because this function + // might be called recursively. + ExpressionResult evaluateIfVariable(EvaluationContext* context, + const Variable& variable) const { + // If this is a variable that is not visible in the input but was bound by a + // previous alias in the same SELECT clause, then read the constant value of + // the variable from the data structures dedicated to this case. + if (auto resultFromSameRow = + context->getResultFromPreviousAggregate(variable); + resultFromSameRow.has_value() && + !context->_groupedVariables.contains(variable)) { + // If the expression is a simple renaming of a variable `(?x AS ?y)` we + // have to recurse to track a possible chain of such renamings in the + // SELECT clause. + if (const Variable* var = + std::get_if(&resultFromSameRow.value()); + var != nullptr) { + return evaluateIfVariable(context, *var); + } else { + return std::move(resultFromSameRow.value()); + } + } + // If a variable is grouped, then we know that it always has the same + // value and can treat it as a constant. This is not possible however when + // we are inside an aggregate, because for example `SUM(?variable)` must + // still compute the sum over the whole group. + if (context->_groupedVariables.contains(variable) && !isInsideAggregate()) { + auto column = context->getColumnIndexForVariable(variable); + const auto& table = context->_inputTable; + auto constantValue = table.at(context->_beginIndex, column); + assert((std::ranges::all_of( + table.begin() + context->_beginIndex, + table.begin() + context->_endIndex, + [&](const auto& row) { return row[column] == constantValue; }))); + return constantValue; + } else { + return variable; + } + } }; } // namespace detail @@ -129,5 +154,3 @@ using StringLiteralExpression = detail::LiteralExpression; using IdExpression = detail::LiteralExpression; } // namespace sparqlExpression - -#endif // QLEVER_LITERALEXPRESSION_H diff --git a/src/engine/sparqlExpressions/SampleExpression.cpp b/src/engine/sparqlExpressions/SampleExpression.cpp index 85eeb48452..79944e49d7 100644 --- a/src/engine/sparqlExpressions/SampleExpression.cpp +++ b/src/engine/sparqlExpressions/SampleExpression.cpp @@ -25,12 +25,9 @@ ExpressionResult SampleExpression::evaluate(EvaluationContext* context) const { } else if constexpr (std::is_same_v) { // TODO Can't this be a simpler function (getIdAt) AD_CONTRACT_CHECK(context->_endIndex > context->_beginIndex); - EvaluationContext contextForSingleValue = *context; - contextForSingleValue._endIndex = contextForSingleValue._beginIndex + 1; - auto idOfFirstAsVector = - detail::getIdsFromVariable(childResult, &contextForSingleValue); - return idOfFirstAsVector[0]; - ; + std::span idOfFirstAsVector = detail::getIdsFromVariable( + childResult, context, context->_beginIndex, context->_endIndex); + return ExpressionResult{idOfFirstAsVector[0]}; } else { static_assert(isConstantResult); return childResult; diff --git a/src/engine/sparqlExpressions/SampleExpression.h b/src/engine/sparqlExpressions/SampleExpression.h index a6e91dd7f8..a0e6ae7525 100644 --- a/src/engine/sparqlExpressions/SampleExpression.h +++ b/src/engine/sparqlExpressions/SampleExpression.h @@ -16,13 +16,15 @@ namespace sparqlExpression { class SampleExpression : public SparqlExpression { public: SampleExpression([[maybe_unused]] bool distinct, Ptr&& child) - : _child{std::move(child)} {} + : _child{std::move(child)} { + setIsInsideAggregate(); + } // __________________________________________________________________________ ExpressionResult evaluate(EvaluationContext* context) const override; // _____________________________________________________________________ - vector getUnaggregatedVariables() override { + vector getUnaggregatedVariables() override { // This is an aggregation, so it never leaves any unaggregated variables. return {}; } diff --git a/src/engine/sparqlExpressions/SparqlExpression.h b/src/engine/sparqlExpressions/SparqlExpression.h index 5fe802d0ec..55ea61c94b 100644 --- a/src/engine/sparqlExpressions/SparqlExpression.h +++ b/src/engine/sparqlExpressions/SparqlExpression.h @@ -62,10 +62,10 @@ class SparqlExpression { /// Return all the variables that occur in the expression, but are not /// aggregated. - virtual vector getUnaggregatedVariables() { + virtual vector getUnaggregatedVariables() { // Default implementation: This expression adds no variables, but all // unaggregated variables from the children remain unaggregated. - std::vector result; + std::vector result; for (const auto& child : children()) { auto childResult = child->getUnaggregatedVariables(); result.insert(result.end(), std::make_move_iterator(childResult.begin()), diff --git a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h index ad2359fd1f..12e844c588 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h +++ b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h @@ -13,17 +13,12 @@ namespace sparqlExpression::detail { -// Internal implementation of `getIdsFromVariable` (see below). -// It is required because of the `CALL_FIXED_SIZE` mechanism for the `IdTable`s. -template -void getIdsFromVariableImpl(VectorWithMemoryLimit& result, - const ::Variable& variable, - EvaluationContext* context) { - AD_CONTRACT_CHECK(result.empty()); - const auto inputTable = context->_inputTable.asStaticView(); - - const size_t beginIndex = context->_beginIndex; - const size_t endIndex = context->_endIndex; +/// Convert a variable to a vector of all the Ids it is bound to in the +/// `context`. +inline std::span getIdsFromVariable( + const ::Variable& variable, const EvaluationContext* context, + size_t beginIndex, size_t endIndex) { + const auto& inputTable = context->_inputTable; if (!context->_variableToColumnAndResultTypeMap.contains(variable)) { throw std::runtime_error( @@ -34,22 +29,20 @@ void getIdsFromVariableImpl(VectorWithMemoryLimit& result, const size_t columnIndex = context->_variableToColumnAndResultTypeMap.at(variable).first; - result.reserve(endIndex - beginIndex); - for (size_t i = beginIndex; i < endIndex; ++i) { - result.push_back(ValueId{inputTable(i, columnIndex)}); - } + std::span completeColumn = inputTable.getColumn(columnIndex); + + AD_CONTRACT_CHECK(beginIndex <= endIndex && + endIndex <= completeColumn.size()); + return {completeColumn.begin() + beginIndex, + completeColumn.begin() + endIndex}; } -/// Convert a variable to a vector of all the Ids it is bound to in the -/// `context`. -// TODO Restructure QLever to column based design, then this will -// become a noop; -inline VectorWithMemoryLimit getIdsFromVariable( - const ::Variable& variable, EvaluationContext* context) { - auto cols = context->_inputTable.numColumns(); - VectorWithMemoryLimit result{context->_allocator}; - CALL_FIXED_SIZE(cols, &getIdsFromVariableImpl, result, variable, context); - return result; +// Overload that reads the `beginIndex` and the `endIndex` directly from the +// `context +inline std::span getIdsFromVariable( + const ::Variable& variable, const EvaluationContext* context) { + return getIdsFromVariable(variable, context, context->_beginIndex, + context->_endIndex); } /// Generators that yield `numItems` items for the various @@ -62,12 +55,14 @@ requires isConstantResult cppcoro::generator resultGenerator( } } -template -requires isVectorResult cppcoro::generator -resultGenerator(T vector, size_t numItems) { +template +requires isVectorResult +auto resultGenerator(T vector, size_t numItems) + -> cppcoro::generator> { AD_CONTRACT_CHECK(numItems == vector.size()); for (auto& element : vector) { - co_yield std::move(element); + auto cpy = std::move(element); + co_yield cpy; } } @@ -94,10 +89,9 @@ inline cppcoro::generator resultGenerator(ad_utility::SetOfIntervals set, template auto makeGenerator(Input&& input, size_t numItems, EvaluationContext* context) { if constexpr (ad_utility::isSimilar<::Variable, Input>) { - // TODO: Also directly write a generator that lazily gets the Ids in chunks. - StrongIdsWithResultType inputWithVariableResolved{ + std::span inputWithVariableResolved{ getIdsFromVariable(std::forward(input), context)}; - return resultGenerator(std::move(inputWithVariableResolved), numItems); + return resultGenerator(inputWithVariableResolved, numItems); } else { return resultGenerator(std::forward(input), numItems); } diff --git a/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp b/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp index 4ef05c2651..ed1e004bd0 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp @@ -4,6 +4,7 @@ #include "./SparqlExpressionPimpl.h" +#include "engine/sparqlExpressions/LiteralExpression.h" #include "engine/sparqlExpressions/SparqlExpression.h" namespace sparqlExpression { @@ -34,8 +35,8 @@ SparqlExpressionPimpl& SparqlExpressionPimpl::operator=( const SparqlExpressionPimpl&) = default; // ____________________________________________________________________________ -std::vector SparqlExpressionPimpl::getUnaggregatedVariables( - const ad_utility::HashSet& groupedVariables) const { +std::vector SparqlExpressionPimpl::getUnaggregatedVariables( + const ad_utility::HashSet& groupedVariables) const { auto vars = _pimpl->getUnaggregatedVariables(); std::erase_if( vars, [&](const auto& var) { return groupedVariables.contains(var); }); @@ -100,4 +101,10 @@ bool SparqlExpressionPimpl::containsLangExpression() const { bool SparqlExpressionPimpl::containsAggregate() const { return _pimpl->containsAggregate(); } + +// ______________________________________________________________________________ +SparqlExpressionPimpl SparqlExpressionPimpl::makeVariableExpression( + const Variable& variable) { + return {std::make_unique(variable), variable.name()}; +} } // namespace sparqlExpression diff --git a/src/engine/sparqlExpressions/SparqlExpressionPimpl.h b/src/engine/sparqlExpressions/SparqlExpressionPimpl.h index 1d98ed25ab..c9b023c020 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionPimpl.h +++ b/src/engine/sparqlExpressions/SparqlExpressionPimpl.h @@ -29,14 +29,14 @@ class SparqlExpressionPimpl { // Get the variables that are not aggregated by this expression. The variables // in the argument `groupedVariables` are deleted from the result (grouped // variables do not have to be aggregated). - [[nodiscard]] std::vector getUnaggregatedVariables( - const ad_utility::HashSet& groupedVariables = {}) const; + [[nodiscard]] std::vector getUnaggregatedVariables( + const ad_utility::HashSet& groupedVariables = {}) const; // Does this expression aggregate over all variables that are not in // `groupedVariables`. For example, COUNT() always returns true. // COUNT(?x) + ?m returns true if and only if ?m is in `groupedVariables`. [[nodiscard]] bool isAggregate( - const ad_utility::HashSet& groupedVariables) const { + const ad_utility::HashSet& groupedVariables) const { // TODO This can be std::ranges::all_of as soon as libc++ supports // it, or the combination of clang + libstdc++ + coroutines works. auto unaggregatedVariables = getUnaggregatedVariables(); @@ -116,6 +116,9 @@ class SparqlExpressionPimpl { return _pimpl.get(); } + // Create a `SparqlExpressionPimpl` from a single variable. + static SparqlExpressionPimpl makeVariableExpression(const Variable& variable); + private: // TODO Why can't this be a unique_ptr. std::shared_ptr _pimpl; diff --git a/src/engine/sparqlExpressions/SparqlExpressionTypes.h b/src/engine/sparqlExpressions/SparqlExpressionTypes.h index f126cff9eb..4f4278957e 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionTypes.h +++ b/src/engine/sparqlExpressions/SparqlExpressionTypes.h @@ -38,7 +38,6 @@ class VectorWithMemoryLimit public: VectorWithMemoryLimit& operator=(const VectorWithMemoryLimit&) = delete; - // Moving is fine. VectorWithMemoryLimit(VectorWithMemoryLimit&&) noexcept = default; VectorWithMemoryLimit& operator=(VectorWithMemoryLimit&&) noexcept = default; @@ -97,7 +96,67 @@ using StrongIdsWithResultType = VectorWithMemoryLimit; using VariableToColumnAndResultTypeMap = ad_utility::HashMap>; -/// All the additional information which is needed to evaluate a Sparql +/// The result of an expression can either be a vector of bool/double/int/string +/// a variable (e.g. in BIND (?x as ?y)) or a "Set" of indices, which identifies +/// the row indices in which a boolean expression evaluates to "true". Constant +/// results are represented by a vector with only one element. +namespace detail { +// For each type T in this tuple, T as well as VectorWithMemoryLimit are +// possible expression result types. +using ConstantTypes = std::tuple; +using ConstantTypesAsVector = + ad_utility::LiftedTuple; + +// Each type in this tuple also is a possible expression result type. +using OtherTypes = std::tuple; + +using AllTypesAsTuple = + ad_utility::TupleCat; +} // namespace detail + +/// An Expression result is a std::variant of all the different types from +/// the expressionResultDetail namespace (see above). +using ExpressionResult = ad_utility::TupleToVariant; + +/// Only the different types contained in the variant `ExpressionResult` (above) +/// match this concept. +template +concept SingleExpressionResult = + ad_utility::isTypeContainedIn; + +// If the `ExpressionResult` holds a value that is cheap to copy (a constant or +// a variable), return a copy. Else throw an exception the message of which +// implies that this is not a valid usage of this function. +inline ExpressionResult copyExpressionResultIfNotVector( + const ExpressionResult& result) { + auto copyIfCopyable = + [](const R& x) -> ExpressionResult { + if constexpr (std::is_copy_assignable_v && + std::is_copy_constructible_v) { + return x; + } else { + AD_THROW( + "Tried to copy an expression result that is a vector. This should " + "never happen, as this code should only be called for the results of " + "expressions in a GROUP BY clause which all should be aggregates. " + "Please report this."); + } + }; + return std::visit(copyIfCopyable, result); +} + +/// True iff T represents a constant. +template +constexpr static bool isConstantResult = + ad_utility::isTypeContainedIn; + +/// True iff T is one of the ConstantTypesAsVector +template +constexpr static bool isVectorResult = + ad_utility::isTypeContainedIn || + ad_utility::isSimilar>; + +/// All the additional information which is needed to evaluate a SPARQL /// expression. struct EvaluationContext { const QueryExecutionContext& _qec; @@ -129,6 +188,17 @@ struct EvaluationContext { // treated as constants. ad_utility::HashSet _groupedVariables; + // Only needed during GROUP BY evaluation. + // Stores information about the results from previous expressions of the same + // SELECT clause line that might be accessed in the same SELECT clause. + + // This map maps variables that are bound in the select clause to indices. + VariableToColumnMap _variableToColumnMapPreviousResults; + // This vector contains the last result of the expressions in the SELECT + // clause. The correct index for a given variable is obtained from the + // `_variableToColumnMapPreviousResults`. + std::vector _previousResultsFromSameGroup; + /// Constructor for evaluating an expression on the complete input. EvaluationContext( const QueryExecutionContext& qec, @@ -181,45 +251,21 @@ struct EvaluationContext { } return map.at(var).first; } -}; - -/// The result of an expression can either be a vector of bool/double/int/string -/// a variable (e.g. in BIND (?x as ?y)) or a "Set" of indices, which identifies -/// the row indices in which a boolean expression evaluates to "true". Constant -/// results are represented by a vector with only one element. -namespace detail { -// For each type T in this tuple, T as well as VectorWithMemoryLimit are -// possible expression result types. -using ConstantTypes = std::tuple; -using ConstantTypesAsVector = - ad_utility::LiftedTuple; - -// Each type in this tuple also is a possible expression result type. -using OtherTypes = std::tuple; -using AllTypesAsTuple = - ad_utility::TupleCat; -} // namespace detail - -/// An Expression result is a std::variant of all the different types from -/// the expressionResultDetail namespace (see above). -using ExpressionResult = ad_utility::TupleToVariant; - -/// Only the different types contained in the variant `ExpressionResult` (above) -/// match this concept. -template -concept SingleExpressionResult = - ad_utility::isTypeContainedIn; - -/// True iff T represents a constant. -template -constexpr static bool isConstantResult = - ad_utility::isTypeContainedIn; + // _____________________________________________________________________________ + std::optional getResultFromPreviousAggregate( + const Variable& var) const { + const auto& map = _variableToColumnMapPreviousResults; + if (!map.contains(var)) { + return std::nullopt; + } + auto idx = map.at(var); + AD_CONTRACT_CHECK(idx < _previousResultsFromSameGroup.size()); -/// True iff T is one of the ConstantTypesAsVector -template -constexpr static bool isVectorResult = - ad_utility::isTypeContainedIn; + return copyExpressionResultIfNotVector( + _previousResultsFromSameGroup.at(idx)); + } +}; namespace detail { diff --git a/src/parser/ParsedQuery.cpp b/src/parser/ParsedQuery.cpp index 27a1bd86a1..2424bce916 100644 --- a/src/parser/ParsedQuery.cpp +++ b/src/parser/ParsedQuery.cpp @@ -4,17 +4,18 @@ #include "ParsedQuery.h" -#include -#include -#include -#include - #include #include #include #include #include +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "engine/sparqlExpressions/SparqlExpressionPimpl.h" +#include "parser/RdfEscaping.h" +#include "util/Conversions.h" + using std::string; using std::vector; @@ -105,9 +106,7 @@ Variable ParsedQuery::addInternalBind( sparqlExpression::SparqlExpressionPimpl expression) { // Internal variable name to which the result of the helper bind is // assigned. - auto targetVariable = Variable{INTERNAL_VARIABLE_PREFIX + - std::to_string(numInternalVariables_)}; - numInternalVariables_++; + auto targetVariable = getNewInternalVariable(); // Don't register the targetVariable as visible because it is used // internally and should not be selected by SELECT * (this is the `bool` // argument to `addBind`). @@ -118,6 +117,18 @@ Variable ParsedQuery::addInternalBind( return targetVariable; } +// ________________________________________________________________________ +Variable ParsedQuery::addInternalAlias( + sparqlExpression::SparqlExpressionPimpl expression) { + // Internal variable name to which the result of the helper bind is + // assigned. + auto targetVariable = getNewInternalVariable(); + // Don't register the targetVariable as visible because it is used + // internally and should not be visible to the user. + selectClause().addAlias(Alias{std::move(expression), targetVariable}, true); + return targetVariable; +} + // ________________________________________________________________________ void ParsedQuery::addBind(sparqlExpression::SparqlExpressionPimpl expression, Variable targetVariable, bool targetIsVisible) { @@ -130,64 +141,8 @@ void ParsedQuery::addBind(sparqlExpression::SparqlExpressionPimpl expression, // ________________________________________________________________________ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { - auto checkVariableIsVisible = [this](const Variable& var, - const std::string& locationDescription, - const ad_utility::HashSet& - additionalVisibleVariables = {}) { - if (!ad_utility::contains(getVisibleVariables(), var) && - !additionalVisibleVariables.contains(var)) { - throw InvalidQueryException("Variable " + var.name() + " was used by " + - locationDescription + - ", but is not defined in the query body."); - } - }; - auto checkUsedVariablesAreVisible = - [&checkVariableIsVisible]( - const sparqlExpression::SparqlExpressionPimpl& expression, - const std::string& locationDescription, - const ad_utility::HashSet& additionalVisibleVariables = - {}) { - for (const auto* var : expression.containedVariables()) { - checkVariableIsVisible(*var, - locationDescription + " in expression \"" + - expression.getDescriptor() + "\"", - additionalVisibleVariables); - } - }; - // Process groupClause - auto processVariable = [this, - &checkVariableIsVisible](const Variable& groupKey) { - checkVariableIsVisible(groupKey, "GROUP BY"); - - _groupByVariables.emplace_back(groupKey.name()); - }; - auto processExpression = - [this, &checkUsedVariablesAreVisible]( - sparqlExpression::SparqlExpressionPimpl groupKey) { - checkUsedVariablesAreVisible(groupKey, "GROUP BY"); - auto helperTarget = addInternalBind(std::move(groupKey)); - _groupByVariables.emplace_back(helperTarget.name()); - }; - auto processAlias = [this](Alias groupKey) { - parsedQuery::Bind helperBind{std::move(groupKey._expression), - groupKey._target}; - _rootGraphPattern._graphPatterns.emplace_back(std::move(helperBind)); - registerVariableVisibleInQueryBody(groupKey._target); - _groupByVariables.emplace_back(groupKey._target); - }; - - for (auto& orderKey : modifiers.groupByVariables_) { - std::visit( - ad_utility::OverloadCallOperator{processVariable, processExpression, - processAlias}, - std::move(orderKey)); - } - - // Process havingClause - // TODO as soon as FILTER and HAVING support proper - // expressions, also add similar sanity checks for the HAVING clause here. - _havingClauses = std::move(modifiers.havingClauses_); + addGroupByClause(std::move(modifiers.groupByVariables_)); const bool isExplicitGroupBy = !_groupByVariables.empty(); const bool isImplicitGroupBy = @@ -207,65 +162,12 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { "clause." + noteForImplicitGroupBy; - // Process orderClause - auto processVariableOrderKey = [this, &checkVariableIsVisible, isGroupBy, - ¬eForImplicitGroupBy]( - VariableOrderKey orderKey) { - // Check whether grouping is done. The variable being ordered by - // must then be either grouped or the result of an alias in the select. - const vector& groupByVariables = _groupByVariables; - - if (!isGroupBy) { - checkVariableIsVisible(orderKey.variable_, "ORDER BY"); - } else if (!ad_utility::contains(groupByVariables, orderKey.variable_) && - // `ConstructClause` has no Aliases. So the variable can never be - // the result of an Alias. - (hasConstructClause() || - !ad_utility::contains_if(selectClause().getAliases(), - [&orderKey](const Alias& alias) { - return alias._target == - orderKey.variable_; - }))) { - throw InvalidQueryException( - "Variable " + orderKey.variable_.name() + - " was used in an ORDER BY " - "clause, but is neither grouped, nor created as an alias in the " - "SELECT clause." + - noteForImplicitGroupBy); - } + // Process HAVING clause + addHavingClause(std::move(modifiers.havingClauses_), isGroupBy); - _orderBy.push_back(std::move(orderKey)); - }; - - // QLever currently only supports ordering by variables. To allow - // all `orderConditions`, the corresponding expression is bound to a new - // internal variable. Ordering is then done by this variable. - auto processExpressionOrderKey = [this, &checkUsedVariablesAreVisible, - isGroupBy, ¬eForImplicitGroupBy]( - ExpressionOrderKey orderKey) { - checkUsedVariablesAreVisible(orderKey.expression_, "ORDER BY"); - if (isGroupBy) { - // TODO Implement this by adding a hidden alias in the - // SELECT clause. - throw NotSupportedException( - "Ordering by an expression while grouping is not supported by " - "QLever. (The expression is \"" + - orderKey.expression_.getDescriptor() + - "\"). Please assign this expression to a " - "new variable in the SELECT clause and then order by this " - "variable." + - noteForImplicitGroupBy); - } - auto additionalVariable = addInternalBind(std::move(orderKey.expression_)); - _orderBy.emplace_back(additionalVariable, orderKey.isDescending_); - }; - - for (auto& orderKey : modifiers.orderBy_.orderKeys) { - std::visit(ad_utility::OverloadCallOperator{processVariableOrderKey, - processExpressionOrderKey}, - std::move(orderKey)); - } - _isInternalSort = modifiers.orderBy_.isInternalSort; + // Process ORDER BY clause + addOrderByClause(std::move(modifiers.orderBy_), isGroupBy, + noteForImplicitGroupBy); // Process limitOffsetClause _limitOffset = modifiers.limitOffset_; @@ -281,7 +183,7 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { if (ad_utility::contains(selectClause().getVisibleVariables(), alias._target)) { throw InvalidQueryException(absl::StrCat( - "The target", alias._target.name(), + "The target ", alias._target.name(), " of an AS clause was already used in the query body.")); } @@ -289,7 +191,7 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { // parsing the alias, thus it should appear exactly once if (variable_counts[alias._target] > 1) { throw InvalidQueryException(absl::StrCat( - "The target", alias._target.name(), + "The target ", alias._target.name(), " of an AS clause was already used before in the SELECT clause.")); } } @@ -301,44 +203,17 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { // Check that all the variables that are used in aliases are either visible // in the query body or are bound by a previous alias from the same SELECT // clause. - // Note: Currently the reusage of variables from previous aliases - // like SELECT (?a AS ?b) (?b AS ?c) is only supported by QLever if there is - // no GROUP BY in the query. To support this we would also need changes in - // the `GroupBy` class. - // TODO Implement these changes and support this case. ad_utility::HashSet variablesBoundInAliases; for (const auto& alias : selectClause().getAliases()) { - if (!isGroupBy) { - checkUsedVariablesAreVisible(alias._expression, "SELECT", - variablesBoundInAliases); - } else { - try { - checkUsedVariablesAreVisible(alias._expression, "SELECT", {}); - } catch (const InvalidQueryException& ex) { - // If the variable is neither defined in the query body nor in the - // select clause before, then the following call will throw the same - // exception that we have just caught. Else we are in the unsupported - // case and throw a more useful error message. - checkUsedVariablesAreVisible(alias._expression, "SELECT", - variablesBoundInAliases); - std::string_view note = - " Note: This variable was defined previously in the SELECT " - "clause, which is supported by the SPARQL standard, but " - "currently not supported by QLever when the query contains a " - "GROUP BY clause."; - throw NotSupportedException{ - absl::StrCat(ex.errorMessageWithoutPrefix(), note, - noteForGroupByError), - ex.metadata()}; - } - } + checkUsedVariablesAreVisible(alias._expression, "SELECT", + variablesBoundInAliases); variablesBoundInAliases.insert(alias._target); } if (isGroupBy) { - ad_utility::HashSet groupVariables{}; + ad_utility::HashSet groupVariables{}; for (const auto& variable : _groupByVariables) { - groupVariables.emplace(variable.toSparql()); + groupVariables.emplace(variable); } if (selectClause().isAsterisk()) { @@ -354,14 +229,19 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { if (auto it = std::ranges::find(aliases, var, &Alias::_target); it != aliases.end()) { const auto& alias = *it; - if (alias._expression.isAggregate(groupVariables)) { + auto relevantVariables = groupVariables; + for (auto i = aliases.begin(); i < it; ++i) { + relevantVariables.insert(i->_target); + } + if (alias._expression.isAggregate(relevantVariables)) { continue; } else { auto unaggregatedVars = alias._expression.getUnaggregatedVariables(groupVariables); throw InvalidQueryException(absl::StrCat( "The expression \"", alias._expression.getDescriptor(), - "\" does not aggregate ", absl::StrJoin(unaggregatedVars, ", "), + "\" does not aggregate ", + absl::StrJoin(unaggregatedVars, ", ", Variable::AbslFormatter), "." + noteForGroupByError)); } } @@ -447,7 +327,7 @@ void ParsedQuery::GraphPattern::toString(std::ostringstream& os, for (int j = 0; j < indentation; ++j) os << " "; os << _filters[i].asString() << ','; } - if (_filters.size() > 0) { + if (!_filters.empty()) { os << "\n"; for (int j = 0; j < indentation; ++j) os << " "; os << _filters.back().asString(); @@ -505,8 +385,7 @@ void ParsedQuery::GraphPattern::recomputeIds(size_t* id_count) { // __________________________________________________________________________ ParsedQuery::GraphPattern::GraphPattern() : _optional(false) {} -// TODO Change the first argument to `Variable`, but first merge -// the filter-PR. +// __________________________________________________________________________ void ParsedQuery::GraphPattern::addLanguageFilter( const Variable& variable, const std::string& languageInQuotes) { auto langTag = languageInQuotes.substr(1, languageInQuotes.size() - 2); @@ -591,3 +470,167 @@ ParsedQuery::getConstructedOrSelectedVariables() const { } // Nothing to yield in the CONSTRUCT case. } + +// ____________________________________________________________________________ +void ParsedQuery::checkVariableIsVisible( + const Variable& variable, const std::string& locationDescription, + const ad_utility::HashSet& additionalVisibleVariables, + std::string_view otherPossibleLocationDescription) const { + if (!ad_utility::contains(getVisibleVariables(), variable) && + !additionalVisibleVariables.contains(variable)) { + throw InvalidQueryException(absl::StrCat( + "Variable ", variable.name(), " was used by " + locationDescription, + ", but is not defined in the query body", + otherPossibleLocationDescription, ".")); + } +} + +// ____________________________________________________________________________ +void ParsedQuery::checkUsedVariablesAreVisible( + const sparqlExpression::SparqlExpressionPimpl& expression, + const std::string& locationDescription, + const ad_utility::HashSet& additionalVisibleVariables, + std::string_view otherPossibleLocationDescription) const { + for (const auto* var : expression.containedVariables()) { + checkVariableIsVisible(*var, + locationDescription + " in expression \"" + + expression.getDescriptor() + "\"", + additionalVisibleVariables, + otherPossibleLocationDescription); + } +} + +// ____________________________________________________________________________ +void ParsedQuery::addGroupByClause(std::vector groupKeys) { + // Process groupClause + auto processVariable = [this](const Variable& groupKey) { + checkVariableIsVisible(groupKey, "GROUP BY"); + + _groupByVariables.push_back(groupKey); + }; + + ad_utility::HashSet variablesDefinedInGroupBy; + auto processExpression = + [this, &variablesDefinedInGroupBy]( + sparqlExpression::SparqlExpressionPimpl groupKey) { + checkUsedVariablesAreVisible(groupKey, "GROUP BY", + variablesDefinedInGroupBy, + " or previously in the same GROUP BY"); + auto helperTarget = addInternalBind(std::move(groupKey)); + _groupByVariables.push_back(helperTarget); + }; + + auto processAlias = [this, &variablesDefinedInGroupBy](Alias groupKey) { + checkUsedVariablesAreVisible(groupKey._expression, "GROUP BY", + variablesDefinedInGroupBy, + " or previously in the same GROUP BY"); + variablesDefinedInGroupBy.insert(groupKey._target); + addBind(std::move(groupKey._expression), groupKey._target, true); + _groupByVariables.push_back(groupKey._target); + }; + + for (auto& groupByKey : groupKeys) { + std::visit( + ad_utility::OverloadCallOperator{processVariable, processExpression, + processAlias}, + std::move(groupByKey)); + } +} + +// ____________________________________________________________________________ +void ParsedQuery::addHavingClause(std::vector havingClauses, + bool isGroupBy) { + if (!isGroupBy && !havingClauses.empty()) { + throw InvalidQueryException( + "A HAVING clause is only supported in queries with GROUP BY"); + } + + ad_utility::HashSet variablesFromAliases; + if (hasSelectClause()) { + for (const auto& alias : selectClause().getAliases()) { + variablesFromAliases.insert(alias._target); + } + } + for (auto& havingClause : havingClauses) { + checkUsedVariablesAreVisible( + havingClause.expression_, "HAVING", variablesFromAliases, + " and also not bound inside the SELECT clause"); + auto newVariable = addInternalAlias(std::move(havingClause.expression_)); + // TODO simply use `emplace_back` to initialize the + // aggregate `SparqlFilter`. + _havingClauses.push_back(SparqlFilter{ + sparqlExpression::SparqlExpressionPimpl::makeVariableExpression( + newVariable)}); + } +} + +// ____________________________________________________________________________ +void ParsedQuery::addOrderByClause(OrderClause orderClause, bool isGroupBy, + std::string_view noteForImplicitGroupBy) { + // The variables that are used in the ORDER BY can also come from aliases in + // the SELECT clause + ad_utility::HashSet variablesFromAliases; + if (hasSelectClause()) { + for (const auto& alias : selectClause().getAliases()) { + variablesFromAliases.insert(alias._target); + } + } + + std::string_view additionalError = + " and also not bound inside the SELECT clause"; + + // Process orderClause + auto processVariableOrderKey = [this, isGroupBy, ¬eForImplicitGroupBy, + &variablesFromAliases, + additionalError](VariableOrderKey orderKey) { + checkVariableIsVisible(orderKey.variable_, "ORDER BY", variablesFromAliases, + additionalError); + + // Check whether grouping is done. The variable being ordered by + // must then be either grouped or the result of an alias in the select + // clause. + if (isGroupBy && + !ad_utility::contains(_groupByVariables, orderKey.variable_) && + (!variablesFromAliases.contains(orderKey.variable_))) { + throw InvalidQueryException( + "Variable " + orderKey.variable_.name() + + " was used in an ORDER BY clause, but is neither grouped nor " + "created as an alias in the SELECT clause." + + noteForImplicitGroupBy); + } + _orderBy.push_back(std::move(orderKey)); + }; + + // QLever currently only supports ordering by variables. To allow + // all `orderConditions`, the corresponding expression is bound to a new + // internal variable. Ordering is then done by this variable. + auto processExpressionOrderKey = + [this, isGroupBy, &variablesFromAliases, + additionalError](ExpressionOrderKey orderKey) { + checkUsedVariablesAreVisible(orderKey.expression_, "ORDER BY", + variablesFromAliases, additionalError); + if (isGroupBy) { + auto newVariable = addInternalAlias(std::move(orderKey.expression_)); + _orderBy.emplace_back(std::move(newVariable), orderKey.isDescending_); + } else { + auto additionalVariable = + addInternalBind(std::move(orderKey.expression_)); + _orderBy.emplace_back(additionalVariable, orderKey.isDescending_); + } + }; + + for (auto& orderKey : orderClause.orderKeys) { + std::visit(ad_utility::OverloadCallOperator{processVariableOrderKey, + processExpressionOrderKey}, + std::move(orderKey)); + } + _isInternalSort = orderClause.isInternalSort; +} + +// ________________________________________________________________ +Variable ParsedQuery::getNewInternalVariable() { + auto variable = + Variable{absl::StrCat(INTERNAL_VARIABLE_PREFIX, numInternalVariables_)}; + numInternalVariables_++; + return variable; +} diff --git a/src/parser/ParsedQuery.h b/src/parser/ParsedQuery.h index 451fa46908..f892de253f 100644 --- a/src/parser/ParsedQuery.h +++ b/src/parser/ParsedQuery.h @@ -176,7 +176,55 @@ class ParsedQuery { // is returned. Variable addInternalBind(sparqlExpression::SparqlExpressionPimpl expression); + // Add an internal AS clause to the SELECT clause that computes the given + // expression. This is needed by the `addSolutionModifiers` function to + // implement aggregating expressions in the ORDER BY and HAVING clauses of + // queries with a GROUP BY + Variable addInternalAlias(sparqlExpression::SparqlExpressionPimpl expression); + + // If the `variable` is neither visible in the query body nor contained in the + // `additionalVisibleVariables`, throw an `InvalidQueryException` that uses + // the `locationDescription` inside the message. + void checkVariableIsVisible( + const Variable& variable, const std::string& locationDescription, + const ad_utility::HashSet& additionalVisibleVariables = {}, + std::string_view otherPossibleLocationDescription = "") const; + + // Similar to `checkVariableIsVisible` above, but performs the check for each + // of the variables that are used inside the `expression`. + void checkUsedVariablesAreVisible( + const sparqlExpression::SparqlExpressionPimpl& expression, + const std::string& locationDescription, + const ad_utility::HashSet& additionalVisibleVariables = {}, + std::string_view otherPossibleLocationDescription = "") const; + + // Add the `groupKeys` (either variables or expressions) to the query and + // check whether all the variables are visible inside the query body. + void addGroupByClause(std::vector groupKeys); + + // Add the `havingClause` to the query. The argument `isGroupBy` denotes + // whether the query performs a GROUP BY. If it is set to false, then an + // exception is thrown (HAVING without GROUP BY is not allowed). The function + // also throws if one of the variables that is used in the `havingClause` is + // neither grouped nor aggregate by the expression it is contained in. + void addHavingClause(std::vector havingClause, bool isGroupBy); + + // Add the `orderClause` to the query. Throw an exception if the `orderClause` + // is not valid. This might happen if it uses variables that are not visible + // or (in case of a GROUP BY) not grouped or aggregated. + void addOrderByClause(OrderClause orderClause, bool isGroupBy, + std::string_view noteForImplicitGroupBy); + + // Return the next internal variable. Used e.g. by `addInternalBind` and + // `addInternalAlias` + Variable getNewInternalVariable(); + public: + // Add the `modifiers` (like GROUP BY, HAVING, ORDER BY) to the query. Throw + // an `InvalidQueryException` if the modifiers are invalid. This might happen + // if one of the modifiers uses a variable that is either not visible in the + // query before it is used, or if it uses a variable that is not properly + // grouped or aggregated in the presence of a GROUP BY clause. void addSolutionModifiers(SolutionModifiers modifiers); /** diff --git a/src/parser/SelectClause.cpp b/src/parser/SelectClause.cpp index c1ceb5eb40..bb32be05e0 100644 --- a/src/parser/SelectClause.cpp +++ b/src/parser/SelectClause.cpp @@ -33,20 +33,30 @@ void SelectClause::setAsterisk() { varsAndAliasesOrAsterisk_ = Asterisk{}; } // ____________________________________________________________________ void SelectClause::setSelected(std::vector varsOrAliases) { - VarsAndAliases v; - auto processVariable = [&v](Variable var) { + varsAndAliasesOrAsterisk_ = VarsAndAliases{}; + for (auto& el : varsOrAliases) { + // The second argument means that the variables are not internal. + addAlias(std::move(el), false); + } +} + +// ____________________________________________________________________________ +void SelectClause::addAlias(parsedQuery::SelectClause::VarOrAlias varOrAlias, + bool isInternal) { + AD_CORRECTNESS_CHECK(!isAsterisk()); + auto& v = std::get(varsAndAliasesOrAsterisk_); + auto processVariable = [&v, isInternal](Variable var) { + AD_CONTRACT_CHECK(!isInternal); v.vars_.push_back(std::move(var)); }; - auto processAlias = [&v](Alias alias) { - v.vars_.emplace_back(alias._target); + auto processAlias = [&v, isInternal](Alias alias) { + if (!isInternal) { + v.vars_.emplace_back(alias._target); + } v.aliases_.push_back(std::move(alias)); }; - - for (auto& el : varsOrAliases) { - std::visit(ad_utility::OverloadCallOperator{processVariable, processAlias}, - std::move(el)); - } - varsAndAliasesOrAsterisk_ = std::move(v); + std::visit(ad_utility::OverloadCallOperator{processVariable, processAlias}, + std::move(varOrAlias)); } // ____________________________________________________________________ diff --git a/src/parser/SelectClause.h b/src/parser/SelectClause.h index f74fe2b014..2ac0da82af 100644 --- a/src/parser/SelectClause.h +++ b/src/parser/SelectClause.h @@ -59,6 +59,7 @@ struct SelectClause : ClauseBase { // `setSelected`. using VarOrAlias = std::variant; void setSelected(std::vector varsOrAliases); + void addAlias(VarOrAlias varOrAlias, bool isInternal); // Overload of `setSelected` (see above) for the simple case, where only // variables and no aliases are selected. diff --git a/src/util/Generator.h b/src/util/Generator.h index feab2999e6..15b46c79cf 100644 --- a/src/util/Generator.h +++ b/src/util/Generator.h @@ -23,9 +23,12 @@ namespace detail { template class generator_promise { public: - using value_type = std::remove_reference_t; + // Even if the generator only yields `const` values, the `value_type` + // shouldn't be `const` because otherwise several static checks when + // interacting with the STL fail. + using value_type = std::remove_cvref_t; using reference_type = std::conditional_t, T, T&>; - using pointer_type = value_type*; + using pointer_type = std::remove_reference_t*; generator_promise() = default; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 20abd82e99..8d21761127 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -306,3 +306,5 @@ addLinkAndDiscoverTest(OnDestructionDontThrowDuringStackUnwindingTest) addLinkAndDiscoverTest(ExceptionHandlingTest) +addLinkAndDiscoverTest(SparqlExpressionTypesTest) + diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index e730c012b6..5c12fc6475 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -10,12 +10,14 @@ #include "engine/GroupBy.h" #include "engine/IndexScan.h" #include "engine/Join.h" +#include "engine/QueryPlanner.h" #include "engine/Values.h" #include "engine/sparqlExpressions/AggregateExpression.h" #include "engine/sparqlExpressions/LiteralExpression.h" #include "engine/sparqlExpressions/NaryExpression.h" #include "gtest/gtest.h" #include "index/ConstantsIndexBuilding.h" +#include "parser/SparqlParser.h" using namespace ad_utility::testing; @@ -742,4 +744,92 @@ TEST(GroupBy, GroupedVariableInExpressions) { EXPECT_EQ(table, expected); } +TEST(GroupBy, AliasResultReused) { + parsedQuery::SparqlValues input; + using TC = TripleComponent; + // Test the following SPARQL query: + // + // SELECT (AVG(?a + ?b) as ?x) (?x + COUNT(?b) AS ?y) WHERE { + // VALUES (?a ?b) { (1.0 3.0) (1.0 7.0) (5.0 4.0)} + // } GROUP BY ?a + // + // Note: The values are chosen such that the results are all integers. + // Otherwise we would get into trouble with floating point comparisons. A + // check with a similar query but with non-integral inputs and results can be + // found in the E2E tests. + + Variable varA = Variable{"?a"}; + Variable varB = Variable{"?b"}; + + input._variables = std::vector{varA, varB}; + input._values.push_back(std::vector{TC(1.0), TC(3.0)}); + input._values.push_back(std::vector{TC(1.0), TC(7.0)}); + input._values.push_back(std::vector{TC(5.0), TC(4.0)}); + auto values = ad_utility::makeExecutionTree( + ad_utility::testing::getQec(), input); + + using namespace sparqlExpression; + + // Create `Alias` object for `(AVG(?a + ?b) AS ?x)`. + auto sum = make(make(varA), + make(varB)); + auto avg = make(false, std::move(sum)); + auto alias1 = Alias{SparqlExpressionPimpl{std::move(avg), "avg(?a + ?b"}, + Variable{"?x"}}; + + // Create `Alias` object for `(?a + COUNT(?b) AS ?y)`. + auto expr2 = make( + make(Variable{"?x"}), + make(false, make(varB))); + auto alias2 = Alias{SparqlExpressionPimpl{std::move(expr2), "?x + COUNT(?b)"}, + Variable{"?y"}}; + + // Set up and evaluate the GROUP BY clause. + GroupBy groupBy{ad_utility::testing::getQec(), + {Variable{"?a"}}, + {std::move(alias1), std::move(alias2)}, + std::move(values)}; + auto result = groupBy.getResult(); + const auto& table = result->_idTable; + + // Check the result. + auto d = DoubleId; + VariableToColumnMap expectedVariables{ + {Variable{"?a"}, 0}, {Variable{"?x"}, 1}, {Variable{"?y"}, 2}}; + EXPECT_THAT(groupBy.getExternallyVisibleVariableColumns(), + ::testing::UnorderedElementsAreArray(expectedVariables)); + auto expected = + makeIdTableFromIdVector({{d(1), d(6), d(8)}, {d(5), d(9), d(10)}}); + EXPECT_EQ(table, expected); +} + } // namespace + +// Expressions in HAVING clauses are converted to special internal aliases. Test +// the combination of parsing and evaluating such queries. +TEST(GroupBy, AddedHavingRows) { + auto query = + "SELECT ?x (COUNT(?y) as ?count) WHERE {" + " VALUES (?x ?y) {(0 1) (0 3) (0 5) (1 4) (1 3) } }" + "GROUP BY ?x HAVING (?count > 2)"; + auto pq = SparqlParser::parseQuery(query); + QueryPlanner qp{ad_utility::testing::getQec()}; + auto tree = qp.createExecutionTree(pq); + + auto res = tree.getResult(); + + // The HAVING is implemented as an alias that creates an internal variable + // which becomes part of the result, but is not selected by the query. + EXPECT_THAT(pq.selectClause().getSelectedVariables(), + ::testing::ElementsAre(Variable{"?x"}, Variable{"?count"})); + VariableToColumnMap expectedVariables{ + {Variable{"?x"}, 0}, + {Variable{"?count"}, 1}, + {Variable{"?_QLever_internal_variable_0"}, 2}}; + EXPECT_THAT(tree.getVariableColumns(), + ::testing::UnorderedElementsAreArray(expectedVariables)); + const auto& table = res->_idTable; + auto i = IntId; + auto expected = makeIdTableFromIdVector({{i(0), i(3), i(1)}}); + EXPECT_EQ(table, expected); +} diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index 0f174512ec..345239eb6e 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -94,7 +94,7 @@ struct ExpectCompleteParse { }; }; -template +template struct ExpectParseFails { SparqlQleverVisitor::PrefixMap prefixMap_ = {}; SparqlQleverVisitor::DisableSomeChecksOnlyForTesting disableSomeChecks = @@ -102,17 +102,19 @@ struct ExpectParseFails { auto operator()( const string& input, + const testing::Matcher& messageMatcher = ::testing::_, ad_utility::source_location l = ad_utility::source_location::current()) { - return operator()(input, prefixMap_, l); + return operator()(input, prefixMap_, messageMatcher, l); } auto operator()( const string& input, SparqlQleverVisitor::PrefixMap prefixMap, + const testing::Matcher& messageMatcher = ::testing::_, ad_utility::source_location l = ad_utility::source_location::current()) { auto trace = generateLocationTrace(l); - EXPECT_THROW(parse(input, std::move(prefixMap), disableSomeChecks), - Exception) - << input; + AD_EXPECT_THROW_WITH_MESSAGE( + parse(input, std::move(prefixMap), disableSomeChecks), + messageMatcher); } }; @@ -955,6 +957,7 @@ TEST(SparqlParser, RDFLiteral) { } TEST(SparqlParser, SelectQuery) { + auto contains = [](const std::string& s) { return ::testing::HasSubstr(s); }; auto expectSelectQuery = ExpectCompleteParse<&Parser::selectQuery>{ {{INTERNAL_PREDICATE_PREFIX_NAME, INTERNAL_PREDICATE_PREFIX_IRI}}}; auto expectSelectQueryFails = ExpectParseFails<&Parser::selectQuery>{}; @@ -978,15 +981,16 @@ TEST(SparqlParser, SelectQuery) { m::pq::LimitOffset({10, 5}))); // ORDER BY - expectSelectQuery( - "SELECT ?x WHERE { ?x ?y ?z } HAVING (?x > 5) ORDER BY ?y ", - testing::AllOf( - m::SelectQuery(m::Select({Var{"?x"}}), DummyGraphPatternMatcher), - m::pq::Having({"(?x > 5)"}), m::pq::OrderKeys({{Var{"?y"}, false}}))); + expectSelectQuery("SELECT ?x WHERE { ?x ?y ?z } ORDER BY ?y ", + testing::AllOf(m::SelectQuery(m::Select({Var{"?x"}}), + DummyGraphPatternMatcher), + m::pq::OrderKeys({{Var{"?y"}, false}}))); // Ordering by a variable or expression which contains a variable that is not // visible in the query body is not allowed. - expectSelectQueryFails("SELECT ?a WHERE { ?a ?b ?c } ORDER BY ?x"); + expectSelectQueryFails("SELECT ?a WHERE { ?a ?b ?c } ORDER BY ?x", + contains("Variable ?x was used by " + "ORDER BY, but is not")); expectSelectQueryFails("SELECT ?a WHERE { ?a ?b ?c } ORDER BY (?x - 10)"); // Explicit GROUP BY @@ -1009,6 +1013,17 @@ TEST(SparqlParser, SelectQuery) { std::pair{"COUNT(?y) + ?z", Var{"?b"}}}), DummyGraphPatternMatcher))); + expectSelectQuery( + "SELECT (SUM(?x) as ?a) WHERE { ?x ?y ?z } GROUP " + "BY ?z ORDER BY (COUNT(?y) + ?z)", + testing::AllOf( + m::SelectQuery( + m::Select({std::pair{"SUM(?x)", Var{"?a"}}}, false, false, + {std::pair{"(COUNT(?y) + ?z)", + Var{"?_QLever_internal_variable_0"}}}), + DummyGraphPatternMatcher), + m::pq::OrderKeys({{Var{"?_QLever_internal_variable_0"}, false}}))); + // It is also illegal to reuse a variable from the body of a query with a // GROUP BY as the target of an alias, even if it is the aggregated variable // itself. @@ -1036,11 +1051,12 @@ TEST(SparqlParser, SelectQuery) { // Explicit GROUP BY but the second alias uses the target of the first alias // as input. - // TODO This is actually allowed by the SPARQL standard, but - // currently not yet supported by QLever. Implement this (for details see the - // comment in `ParsedQuery::addSolutionModifiers`. - expectSelectQueryFails( - "SELECT (?x AS ?z) (?z AS ?zz) WHERE { ?x

?y} GROUP BY ?x"); + expectSelectQuery( + "SELECT (?x AS ?a) (?a AS ?aa) WHERE { ?x ?y ?z} GROUP BY ?x", + testing::AllOf(m::SelectQuery(m::Select({std::pair{"?x", Var{"?a"}}, + std::pair{"?a", Var{"?aa"}}}), + DummyGraphPatternMatcher), + m::pq::GroupKeys({Var{"?x"}}))); // Implicit GROUP BY. expectSelectQuery( @@ -1066,21 +1082,36 @@ TEST(SparqlParser, SelectQuery) { m::Bind(Var{"?z"}, "?y")))); // No GROUP BY but the target of an alias is used twice. - expectSelectQueryFails("SELECT (?x AS ?z) (?x AS ?z) WHERE { ?x

?y}"); + expectSelectQueryFails("SELECT (?x AS ?z) (?x AS ?z) WHERE { ?x

?y}", + contains("The target ?z of an AS clause was already " + "used before in the SELECT clause.")); // `?x` is selected twice. Once as variable and once as the result of an // alias. This is not allowed. - expectSelectQueryFails("SELECT ?x (?y as ?x) WHERE { ?x ?y ?z }"); + expectSelectQueryFails( + "SELECT ?x (?y as ?x) WHERE { ?x ?y ?z }", + contains( + "The target ?x of an AS clause was already used in the query body.")); + + // HAVING is not allowed without GROUP BY + expectSelectQueryFails( + "SELECT ?x WHERE { ?x ?y ?z } HAVING (?x < 3)", + contains("HAVING clause is only supported in queries with GROUP BY")); // The target of the alias (`?y`) is already bound in the WHERE clause. This // is forbidden by the SPARQL standard. - expectSelectQueryFails("SELECT (?x AS ?y) WHERE { ?x ?y }"); + expectSelectQueryFails( + "SELECT (?x AS ?y) WHERE { ?x ?y }", + contains( + "The target ?y of an AS clause was already used in the query body.")); // Datasets are not supported. - expectSelectQueryFails("SELECT * FROM WHERE { ?x ?y ?z }"); + expectSelectQueryFails("SELECT * FROM WHERE { ?x ?y ?z }", + contains("FROM clauses are currently not supported")); } TEST(SparqlParser, ConstructQuery) { + auto contains = [](const std::string& s) { return ::testing::HasSubstr(s); }; auto expectConstructQuery = ExpectCompleteParse<&Parser::constructQuery>{ {{INTERNAL_PREDICATE_PREFIX_NAME, INTERNAL_PREDICATE_PREFIX_IRI}}}; auto expectConstructQueryFails = ExpectParseFails<&Parser::constructQuery>{}; @@ -1114,8 +1145,18 @@ TEST(SparqlParser, ConstructQuery) { m::ConstructQuery({{Var{"?a"}, Iri{""}, Var{"?b"}}}, m::GraphPattern())); // Datasets are not supported. - expectConstructQueryFails("CONSTRUCT { } FROM WHERE { ?a ?b ?c }"); - expectConstructQueryFails("CONSTRUCT FROM WHERE { }"); + expectConstructQueryFails( + "CONSTRUCT { } FROM WHERE { ?a ?b ?c }", + contains("FROM clauses are currently not supported by QLever.")); + expectConstructQueryFails( + "CONSTRUCT FROM WHERE { }", + contains("FROM clauses are currently not supported by QLever.")); + + // GROUP BY and ORDER BY, but the ordered variable is not grouped + expectConstructQueryFails( + "CONSTRUCT {?a } WHERE { ?a ?b ?c } GROUP BY ?a ORDER BY ?b", + contains("Variable ?b was used in an ORDER BY clause, but is neither " + "grouped nor created as an alias in the SELECT clause")); } TEST(SparqlParser, Query) { diff --git a/test/SparqlAntlrParserTestHelpers.h b/test/SparqlAntlrParserTestHelpers.h index ad270d6a6f..c9a1207441 100644 --- a/test/SparqlAntlrParserTestHelpers.h +++ b/test/SparqlAntlrParserTestHelpers.h @@ -94,6 +94,10 @@ inline void PrintTo(const parsedQuery::GraphPatternOperation& op, } } // namespace parsedQuery +inline void PrintTo(const Alias& alias, std::ostream* os) { + (*os) << alias.getDescriptor(); +} + inline void PrintTo(const ParsedQuery& pq, std::ostream* os) { (*os) << "is select query: " << pq.hasSelectClause() << '\n'; (*os) << "Variables: " << ::testing::PrintToString(pq.getVisibleVariables()) @@ -542,9 +546,14 @@ namespace detail { // This matcher cannot be trivially broken down into a combination of existing // googletest matchers because of the way how the aliases are stored in the // select clause. -MATCHER_P3(Select, distinct, reduced, selection, "") { +MATCHER_P4(Select, distinct, reduced, selection, hiddenAliases, "") { const auto& selectedVariables = arg.getSelectedVariables(); - if (selection.size() != selectedVariables.size()) return false; + if (selection.size() != selectedVariables.size()) { + *result_listener << "where the number of selected variables is " + << selectedVariables.size() << ", but " << selection.size() + << " were expected"; + return false; + } size_t alias_counter = 0; for (size_t i = 0; i < selection.size(); i++) { if (holds_alternative<::Variable>(selection[i])) { @@ -572,6 +581,26 @@ MATCHER_P3(Select, distinct, reduced, selection, "") { } } } + + size_t i = 0; + for (const auto& [descriptor, variable] : hiddenAliases) { + if (alias_counter >= arg.getAliases().size()) { + *result_listener << "where selected variables contain less aliases (" + << testing::PrintToString(alias_counter) + << ") than provided to matcher"; + return false; + } + if (descriptor != + arg.getAliases()[alias_counter]._expression.getDescriptor() || + variable != arg.getAliases()[alias_counter]._target) { + *result_listener << "where hidden alias#" << i << " = " + << testing::PrintToString( + arg.getAliases()[alias_counter]); + return false; + } + alias_counter++; + i++; + } return testing::ExplainMatchResult( testing::AllOf( AD_FIELD(p::SelectClause, distinct_, testing::Eq(distinct)), @@ -585,10 +614,11 @@ MATCHER_P3(Select, distinct, reduced, selection, "") { inline auto Select = [](std::vector>> selection, - bool distinct = false, - bool reduced = false) -> Matcher { - return testing::SafeMatcherCast( - detail::Select(distinct, reduced, std::move(selection))); + bool distinct = false, bool reduced = false, + std::vector> hiddenAliases = {}) + -> Matcher { + return testing::SafeMatcherCast(detail::Select( + distinct, reduced, std::move(selection), std::move(hiddenAliases))); }; // Return a `Matcher` that tests whether the descriptor of the expression of a diff --git a/test/SparqlExpressionTestHelpers.h b/test/SparqlExpressionTestHelpers.h index 7e8bfaede9..cd51902f5b 100644 --- a/test/SparqlExpressionTestHelpers.h +++ b/test/SparqlExpressionTestHelpers.h @@ -21,7 +21,7 @@ struct DummyExpression : public SparqlExpression { ExpressionResult evaluate(EvaluationContext*) const override { return std::move(_result); } - vector getUnaggregatedVariables() override { return {}; } + vector getUnaggregatedVariables() override { return {}; } string getCacheKey( [[maybe_unused]] const VariableToColumnMap& varColMap) const override { return "DummyDummyDummDumm"; diff --git a/test/SparqlExpressionTypesTest.cpp b/test/SparqlExpressionTypesTest.cpp new file mode 100644 index 0000000000..82e8629876 --- /dev/null +++ b/test/SparqlExpressionTypesTest.cpp @@ -0,0 +1,25 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#include "./util/AllocatorTestHelpers.h" +#include "./util/GTestHelpers.h" +#include "engine/sparqlExpressions/SparqlExpressionTypes.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using namespace sparqlExpression; + +TEST(SparqlExpressionTypes, expressionResultCopyIfNotVector) { + ExpressionResult a = 42.3; + ExpressionResult b = 1.0; + ASSERT_NO_THROW(b = copyExpressionResultIfNotVector(a)); + ASSERT_EQ(a, b); + + a = VectorWithMemoryLimit(ad_utility::testing::makeAllocator()); + ASSERT_ANY_THROW(b = copyExpressionResultIfNotVector(a)); + AD_EXPECT_THROW_WITH_MESSAGE( + b = copyExpressionResultIfNotVector(a), + ::testing::StartsWith( + "Tried to copy an expression result that is a vector.")); +} diff --git a/test/SparqlParserTest.cpp b/test/SparqlParserTest.cpp index 1690bc8a3e..9f1064c2b2 100644 --- a/test/SparqlParserTest.cpp +++ b/test/SparqlParserTest.cpp @@ -1062,6 +1062,8 @@ TEST(ParserTest, Order) { "SELECT ?x WHERE { ?x ?y } GROUP BY ?x ORDER BY ?y"), ParseException); } + // TODO This works now. Adapt the unit tests accordingly. + /* { // Ordering by an expression while grouping is currently not supported. EXPECT_THROW(SparqlParser::parseQuery( @@ -1076,6 +1078,7 @@ TEST(ParserTest, Order) { "?y ORDER BY (2 * ?y)"), ParseException); } + */ } TEST(ParserTest, Group) { diff --git a/test/util/GTestHelpers.h b/test/util/GTestHelpers.h index a9634d9557..a8b9561d11 100644 --- a/test/util/GTestHelpers.h +++ b/test/util/GTestHelpers.h @@ -28,6 +28,22 @@ ::testing::Field(#Member, &Class::Member, Matcher) #endif +// Similar to Gtest's `EXPECT_THROW`. Expect that executing `statement` throws +// an exception that inherits from `std::exception`, and that the error message +// of that exception, obtained by the `what()` member function, matches the +// given `errorMessageMatcher`. +#define AD_EXPECT_THROW_WITH_MESSAGE(statement, errorMessageMatcher) \ + try { \ + statement; \ + FAIL() << "No exception was thrown"; \ + } catch (const std::exception& e) { \ + EXPECT_THAT(e.what(), errorMessageMatcher) \ + << "The exception message does not match"; \ + } catch (...) { \ + FAIL() << "The thrown exception did not inherit from std::exception"; \ + } \ + void() + // _____________________________________________________________________________ // Add the given `source_location` to all gtest failure messages that occur, // while the return value is still in scope. It is important to bind the return