diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index 13400bf961..d18781f1ca 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -86,6 +86,10 @@ class GroupBy : public Operation { return {_subtree.get()}; } + // Getters for testing + const auto& groupByVariables() const { return _groupByVariables; } + const auto& aliases() const { return _aliases; } + struct HashMapAliasInformation; private: diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 8d9530af8a..d7e2d4cb21 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -351,10 +351,20 @@ vector QueryPlanner::getGroupByRow( aliases = pq.selectClause().getAliases(); } + // Inside a `GRAPH ?var {....}` clause, a `GROUP BY` must implicitly (also) + // group by the graph variable. + auto groupVariables = pq._groupByVariables; + if (activeGraphVariable_.has_value()) { + AD_CORRECTNESS_CHECK( + !ad_utility::contains(groupVariables, activeGraphVariable_.value()), + "Graph variable used inside the GRAPH clause, this " + "should have thrown an exception earlier"); + groupVariables.push_back(activeGraphVariable_.value()); + } // The GroupBy constructor automatically takes care of sorting the input if // necessary. groupByPlan._qet = makeExecutionTree( - _qec, pq._groupByVariables, std::move(aliases), parent._qet); + _qec, groupVariables, std::move(aliases), parent._qet); added.push_back(groupByPlan); } return added; diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index c7c4428928..51e505933b 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -21,6 +21,10 @@ class QueryPlanner { using vector = std::vector; ParsedQuery::DatasetClauses activeDatasetClauses_; + // The variable of the innermost `GRAPH ?var` clause that the planner + // currently is planning. + // Note: The behavior of only taking the innermost graph variable into account + // for nested `GRAPH` clauses is compliant with SPARQL 1.1. std::optional activeGraphVariable_; public: diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index 41526026c1..f22ab3b799 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -168,35 +168,46 @@ ad_utility::url_parser::ParsedRequest Server::parseHttpRequest( const ad_utility::httpUtils::HttpRequest auto& request) { // For an HTTP request, `request.target()` yields the HTTP Request-URI. // This is a concatenation of the URL path and the query strings. + using namespace ad_utility::url_parser::sparqlOperation; auto parsedUrl = ad_utility::url_parser::parseRequestTarget(request.target()); ad_utility::url_parser::ParsedRequest parsedRequest{ - std::move(parsedUrl.path_), std::move(parsedUrl.parameters_), - std::nullopt}; - auto extractQueryFromParameters = [&parsedRequest]() { - // Some valid requests (e.g. QLever's custom commands like retrieving index - // statistics) don't have a query. - if (parsedRequest.parameters_.contains("query")) { - parsedRequest.query_ = parsedRequest.parameters_["query"]; - parsedRequest.parameters_.erase("query"); - } - }; + std::move(parsedUrl.path_), std::move(parsedUrl.parameters_), None{}}; + + // Some valid requests (e.g. QLever's custom commands like retrieving index + // statistics) don't have a query. So an empty operation is not necessarily an + // error. + auto setOperationIfSpecifiedInParams = + [&parsedRequest](string_view paramName) { + auto operation = ad_utility::url_parser::getParameterCheckAtMostOnce( + parsedRequest.parameters_, paramName); + if (operation.has_value()) { + parsedRequest.operation_ = Operation{operation.value()}; + parsedRequest.parameters_.erase(paramName); + } + }; if (request.method() == http::verb::get) { - extractQueryFromParameters(); + setOperationIfSpecifiedInParams.template operator()("query"); + if (parsedRequest.parameters_.contains("update")) { + throw std::runtime_error("SPARQL Update is not allowed as GET request."); + } return parsedRequest; } if (request.method() == http::verb::post) { // For a POST request, the content type *must* be either - // "application/x-www-form-urlencoded" (1) or "application/sparql-query" - // (2). + // "application/x-www-form-urlencoded" (1), "application/sparql-query" + // (2) or "application/sparql-update" (3). // // (1) Section 2.1.2: The body of the POST request contains *all* parameters - // (including the query) in an encoded form (just like in the part of a GET - // request after the "?"). + // (including the query or update) in an encoded form (just like in the part + // of a GET request after the "?"). // // (2) Section 2.1.3: The body of the POST request contains *only* the // unencoded SPARQL query. There may be additional HTTP query parameters. // + // (3) Section 2.2.2: The body of the POST request contains *only* the + // unencoded SPARQL update. There may be additional HTTP query parameters. + // // Reference: https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321 std::string_view contentType = request.base()[http::field::content_type]; LOG(DEBUG) << "Content-type: \"" << contentType << "\"" << std::endl; @@ -204,6 +215,8 @@ ad_utility::url_parser::ParsedRequest Server::parseHttpRequest( "application/x-www-form-urlencoded"; static constexpr std::string_view contentTypeSparqlQuery = "application/sparql-query"; + static constexpr std::string_view contentTypeSparqlUpdate = + "application/sparql-update"; // Note: For simplicity we only check via `starts_with`. This ignores // additional parameters like `application/sparql-query;charset=utf8`. We @@ -237,18 +250,28 @@ ad_utility::url_parser::ParsedRequest Server::parseHttpRequest( parsedRequest.parameters_ = ad_utility::url_parser::paramsToMap(query->params()); - extractQueryFromParameters(); + if (parsedRequest.parameters_.contains("query") && + parsedRequest.parameters_.contains("update")) { + throw std::runtime_error( + R"(Request must only contain one of "query" and "update".)"); + } + setOperationIfSpecifiedInParams.template operator()("query"); + setOperationIfSpecifiedInParams.template operator()("update"); return parsedRequest; } if (contentType.starts_with(contentTypeSparqlQuery)) { - parsedRequest.query_ = request.body(); + parsedRequest.operation_ = Query{request.body()}; return parsedRequest; } - throw std::runtime_error( - absl::StrCat("POST request with content type \"", contentType, - "\" not supported (must be \"", contentTypeUrlEncoded, - "\" or \"", contentTypeSparqlQuery, "\")")); + if (contentType.starts_with(contentTypeSparqlUpdate)) { + parsedRequest.operation_ = Update{request.body()}; + return parsedRequest; + } + throw std::runtime_error(absl::StrCat( + "POST request with content type \"", contentType, + "\" not supported (must be \"", contentTypeUrlEncoded, "\", \"", + contentTypeSparqlQuery, "\" or \"", contentTypeSparqlUpdate, "\")")); } std::ostringstream requestMethodName; requestMethodName << request.method(); @@ -322,9 +345,10 @@ Awaitable Server::process( checkParameterNotPresent("named-graph-uri"); auto checkParameter = [¶meters](std::string_view key, - std::optional value, + std::optional value, bool accessAllowed = true) { - return Server::checkParameter(parameters, key, value, accessAllowed); + return Server::checkParameter(parameters, key, std::move(value), + accessAllowed); }; // Check the access token. If an access token is provided and the check fails, @@ -420,39 +444,52 @@ Awaitable Server::process( } } - // If "query" parameter is given, process query. - if (parsedHttpRequest.query_.has_value()) { + auto visitQuery = [&checkParameter, &accessTokenOk, &request, &send, + ¶meters, &requestTimer, + this](ad_utility::url_parser::sparqlOperation::Query query) + -> Awaitable { if (auto timeLimit = co_await verifyUserSubmittedQueryTimeout( checkParameter("timeout", std::nullopt), accessTokenOk, request, send)) { - co_return co_await processQuery( - parameters, parsedHttpRequest.query_.value(), requestTimer, - std::move(request), send, timeLimit.value()); - + co_return co_await processQuery(parameters, query.query_, requestTimer, + std::move(request), send, + timeLimit.value()); } else { // If the optional is empty, this indicates an error response has been // sent to the client already. We can stop here. co_return; } - } + }; + auto visitUpdate = [](const ad_utility::url_parser::sparqlOperation::Update&) + -> Awaitable { + throw std::runtime_error( + "SPARQL 1.1 Update is currently not supported by QLever."); + }; + auto visitNone = + [&response, &send, &request]( + ad_utility::url_parser::sparqlOperation::None) -> Awaitable { + // If there was no "query", but any of the URL parameters processed before + // produced a `response`, send that now. Note that if multiple URL + // parameters were processed, only the `response` from the last one is sent. + if (response.has_value()) { + co_return co_await send(std::move(response.value())); + } - // If there was no "query", but any of the URL parameters processed before - // produced a `response`, send that now. Note that if multiple URL parameters - // were processed, only the `response` from the last one is sent. - if (response.has_value()) { - co_return co_await send(std::move(response.value())); - } + // At this point, if there is a "?" in the query string, it means that there + // are URL parameters which QLever does not know or did not process. + if (request.target().find("?") != std::string::npos) { + throw std::runtime_error( + "Request with URL parameters, but none of them could be processed"); + } + // No path matched up until this point, so return 404 to indicate the client + // made an error and the server will not serve anything else. + co_return co_await send( + createNotFoundResponse("Unknown path", std::move(request))); + }; - // At this point, if there is a "?" in the query string, it means that there - // are URL parameters which QLever does not know or did not process. - if (request.target().find("?") != std::string::npos) { - throw std::runtime_error( - "Request with URL parameters, but none of them could be processed"); - } - // No path matched up until this point, so return 404 to indicate the client - // made an error and the server will not serve anything else. - co_return co_await send( - createNotFoundResponse("Unknown path", std::move(request))); + co_return co_await std::visit( + ad_utility::OverloadCallOperator{visitQuery, visitUpdate, visitNone}, + parsedHttpRequest.operation_); } // _____________________________________________________________________________ @@ -629,7 +666,7 @@ Awaitable Server::sendStreamableResponse( // ____________________________________________________________________________ boost::asio::awaitable Server::processQuery( - const ParamValueMap& params, const string& query, + const ad_utility::url_parser::ParamValueMap& params, const string& query, ad_utility::Timer& requestTimer, const ad_utility::httpUtils::HttpRequest auto& request, auto&& send, TimeLimit timeLimit) { @@ -657,7 +694,9 @@ boost::asio::awaitable Server::processQuery( try { auto containsParam = [¶ms](const std::string& param, const std::string& expected) { - return params.contains(param) && params.at(param) == expected; + auto parameterValue = + ad_utility::url_parser::getParameterCheckAtMostOnce(params, param); + return parameterValue.has_value() && parameterValue.value() == expected; }; const bool pinSubtrees = containsParam("pinsubtrees", "true"); const bool pinResult = containsParam("pinresult", "true"); @@ -695,9 +734,13 @@ boost::asio::awaitable Server::processQuery( mediaType = ad_utility::getMediaTypeFromAcceptHeader(acceptHeader); } - std::optional maxSend = - params.contains("send") ? std::optional{std::stoul(params.at("send"))} - : std::nullopt; + // TODO use std::optional::transform + std::optional maxSend = std::nullopt; + auto parameterValue = + ad_utility::url_parser::getParameterCheckAtMostOnce(params, "send"); + if (parameterValue.has_value()) { + maxSend = std::stoul(parameterValue.value()); + } // Limit JSON requests by default if (!maxSend.has_value() && (mediaType == MediaType::sparqlJson || mediaType == MediaType::qleverJson)) { @@ -733,8 +776,9 @@ boost::asio::awaitable Server::processQuery( auto [cancellationHandle, cancelTimeoutOnDestruction] = setupCancellationHandle(messageSender.getQueryId(), timeLimit); - plannedQuery = - co_await parseAndPlan(query, qec, cancellationHandle, timeLimit); + auto queryDatasets = ad_utility::url_parser::parseDatasetClauses(params); + plannedQuery = co_await parseAndPlan(query, queryDatasets, qec, + cancellationHandle, timeLimit); AD_CORRECTNESS_CHECK(plannedQuery.has_value()); auto& qet = plannedQuery.value().queryExecutionTree_; qet.isRoot() = true; // allow pinning of the final result @@ -859,8 +903,9 @@ Awaitable Server::computeInNewThread(Function function, // _____________________________________________________________________________ net::awaitable> Server::parseAndPlan( - const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit) { + const std::string& query, const vector& queryDatasets, + QueryExecutionContext& qec, SharedCancellationHandle handle, + TimeLimit timeLimit) { auto handleCopy = handle; // The usage of an `optional` here is required because of a limitation in @@ -870,10 +915,16 @@ net::awaitable> Server::parseAndPlan( // probably related to issues in GCC's coroutine implementation. return computeInNewThread( [&query, &qec, enablePatternTrick = enablePatternTrick_, - handle = std::move(handle), - timeLimit]() mutable -> std::optional { + handle = std::move(handle), timeLimit, + &queryDatasets]() mutable -> std::optional { auto pq = SparqlParser::parseQuery(query); handle->throwIfCancelled(); + // SPARQL Protocol 2.1.4 specifies that the dataset from the query + // parameters overrides the dataset from the query itself. + if (!queryDatasets.empty()) { + pq.datasetClauses_ = + parsedQuery::DatasetClauses::fromClauses(queryDatasets); + } QueryPlanner qp(&qec, handle); qp.setEnablePatternTrick(enablePatternTrick); auto qet = qp.createExecutionTree(pq); @@ -914,18 +965,22 @@ bool Server::checkAccessToken( // _____________________________________________________________________________ -std::optional Server::checkParameter( - const ad_utility::HashMap& parameters, - std::string_view key, std::optional value, +std::optional Server::checkParameter( + const ad_utility::url_parser::ParamValueMap& parameters, + std::string_view key, std::optional value, bool accessAllowed) { - if (!parameters.contains(key)) { + auto param = + ad_utility::url_parser::getParameterCheckAtMostOnce(parameters, key); + if (!param.has_value()) { return std::nullopt; } + std::string parameterValue = param.value(); + // If value is given, but not equal to param value, return std::nullopt. If // no value is given, set it to param value. if (value == std::nullopt) { - value = parameters.at(key); - } else if (value != parameters.at(key)) { + value = parameterValue; + } else if (value != parameterValue) { return std::nullopt; } // Now that we have the value, check if there is a problem with the access. diff --git a/src/engine/Server.h b/src/engine/Server.h index 9f492b3c58..c863e93771 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -38,8 +38,6 @@ class Server { virtual ~Server() = default; - using ParamValueMap = ad_utility::HashMap; - private: //! Initialize the server. void initialize(const string& indexBaseName, bool useText, @@ -131,7 +129,7 @@ class Server { /// \param timeLimit Duration in seconds after which the query will be /// cancelled. Awaitable processQuery( - const ParamValueMap& params, const string& query, + const ad_utility::url_parser::ParamValueMap& params, const string& query, ad_utility::Timer& requestTimer, const ad_utility::httpUtils::HttpRequest auto& request, auto&& send, TimeLimit timeLimit); @@ -184,8 +182,9 @@ class Server { /// or throws an exception. We still need to return an `optional` though for /// technical reasons that are described in the definition of this function. net::awaitable> parseAndPlan( - const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit); + const std::string& query, const vector& queryDatasets, + QueryExecutionContext& qec, SharedCancellationHandle handle, + TimeLimit timeLimit); /// Acquire the `CancellationHandle` for the given `QueryId`, start the /// watchdog and call `cancelAfterDeadline` to set the timeout after @@ -214,10 +213,11 @@ class Server { /// the key determines the kind of action. If the key is not found, always /// return `std::nullopt`. If `accessAllowed` is false and a value is present, /// throw an exception. - static std::optional checkParameter( - const ad_utility::HashMap& parameters, - std::string_view key, std::optional value, + static std::optional checkParameter( + const ad_utility::url_parser::ParamValueMap& parameters, + std::string_view key, std::optional value, bool accessAllowed); + FRIEND_TEST(ServerTest, checkParameter); /// Check if user-provided timeout is authorized with a valid access-token or /// lower than the server default. Return an empty optional and send a 403 diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 72b7293e71..200924e119 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -322,31 +322,35 @@ std::optional Service::getSiblingValuesClause() const { checkCancellation(); - ad_utility::HashSet rowSet; - - std::string values = " { "; - for (size_t rowIndex = 0; rowIndex < siblingResult->idTable().size(); - ++rowIndex) { + // Creates a single row of the values clause or an empty string on error. + auto createValueRow = [&](size_t rowIndex) -> std::string { std::string row = "("; for (const auto& columnIdx : commonColumnIndices) { - const auto& optionalString = ExportQueryExecutionTrees::idToStringAndType( + const auto& optStr = idToValueForValuesClause( siblingTree_->getRootOperation()->getIndex(), siblingResult->idTable()(rowIndex, columnIdx), siblingResult->localVocab()); - if (optionalString.has_value()) { - absl::StrAppend(&row, optionalString.value().first, " "); + if (!optStr.has_value()) { + return ""; } + absl::StrAppend(&row, optStr.value(), " "); } row.back() = ')'; + return row; + }; - if (rowSet.contains(row)) { + ad_utility::HashSet rowSet; + std::string values = " { "; + for (size_t rowIndex = 0; rowIndex < siblingResult->idTable().size(); + ++rowIndex) { + std::string row = createValueRow(rowIndex); + if (row.empty() || rowSet.contains(row)) { continue; } - rowSet.insert(row); - absl::StrAppend(&values, row, " "); + absl::StrAppend(&values, row, " "); checkCancellation(); } @@ -455,3 +459,37 @@ void Service::throwErrorWithContext(std::string_view msg, (last100.empty() ? "'" : absl::StrCat(", last 100 bytes: '", last100, "'")))); } + +// ____________________________________________________________________________ +std::optional Service::idToValueForValuesClause( + const Index& index, Id id, const LocalVocab& localVocab) { + using enum Datatype; + const auto& optionalStringAndXsdType = + ExportQueryExecutionTrees::idToStringAndType(index, id, localVocab); + if (!optionalStringAndXsdType.has_value()) { + AD_CORRECTNESS_CHECK(id.getDatatype() == Undefined); + return "UNDEF"; + } + const auto& [value, xsdType] = optionalStringAndXsdType.value(); + + switch (id.getDatatype()) { + case BlankNodeIndex: + // Blank nodes are not allowed in a values clause. Additionally blank + // nodes across a SERVICE endpoint are disjoint anyway, so rows that + // contain blank nodes will never create matches and we can safely omit + // them. + return std::nullopt; + case Int: + case Double: + case Bool: + return value; + default: + if (xsdType) { + return absl::StrCat("\"", value, "\"^^<", xsdType, ">"); + } else if (value.starts_with('<')) { + return value; + } else { + return RdfEscaping::validRDFLiteralFromNormalized(value); + } + } +} diff --git a/src/engine/Service.h b/src/engine/Service.h index 8a10f5efd9..68643aae97 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -102,6 +102,11 @@ class Service : public Operation { static TripleComponent bindingToTripleComponent( const nlohmann::json& binding); + // Create a value for the VALUES-clause used in `getSiblingValuesClause` from + // id. If the id is of type blank node `std::nullopt` is returned. + static std::optional idToValueForValuesClause( + const Index& index, Id id, const LocalVocab& localVocab); + private: // The string returned by this function is used as cache key. std::string getCacheKeyImpl() const override; diff --git a/src/parser/ParsedQuery.cpp b/src/parser/ParsedQuery.cpp index 928c4f3ea4..ee16b21556 100644 --- a/src/parser/ParsedQuery.cpp +++ b/src/parser/ParsedQuery.cpp @@ -15,12 +15,27 @@ #include "engine/sparqlExpressions/SparqlExpressionPimpl.h" #include "parser/RdfEscaping.h" +#include "parser/sparqlParser/SparqlQleverVisitor.h" #include "util/Conversions.h" #include "util/TransparentFunctors.h" using std::string; using std::vector; +// _____________________________________________________________________________ +parsedQuery::DatasetClauses parsedQuery::DatasetClauses::fromClauses( + const std::vector& clauses) { + DatasetClauses result; + for (auto& [dataset, isNamed] : clauses) { + auto& graphs = isNamed ? result.namedGraphs_ : result.defaultGraphs_; + if (!graphs.has_value()) { + graphs.emplace(); + } + graphs.value().insert(dataset); + } + return result; +} + // _____________________________________________________________________________ string SparqlPrefix::asString() const { std::ostringstream os; diff --git a/src/parser/ParsedQuery.h b/src/parser/ParsedQuery.h index eb6bb114c9..94f036bb14 100644 --- a/src/parser/ParsedQuery.h +++ b/src/parser/ParsedQuery.h @@ -37,6 +37,9 @@ using std::string; using std::vector; +// Forward declaration +struct DatasetClause; + namespace parsedQuery { // A struct for the FROM and FROM NAMED clauses; struct DatasetClauses { @@ -44,6 +47,8 @@ struct DatasetClauses { ScanSpecificationAsTripleComponent::Graphs defaultGraphs_{}; // FROM NAMED clauses. ScanSpecificationAsTripleComponent::Graphs namedGraphs_{}; + + static DatasetClauses fromClauses(const std::vector& clauses); }; } // namespace parsedQuery diff --git a/src/parser/RdfEscaping.h b/src/parser/RdfEscaping.h index 5c9ca29180..36bbcd74f7 100644 --- a/src/parser/RdfEscaping.h +++ b/src/parser/RdfEscaping.h @@ -91,9 +91,6 @@ NormalizedRDFString normalizeRDFLiteral(std::string_view origLiteral); * and "be"ta"@en becomes "be\"ta"@en. * * If the `normLiteral` is not a literal, an AD_CONTRACT_CHECK will fail. - * - * TODO: This function currently only handles the escaping of " inside the - * literal, no other characters. Is that enough? */ std::string validRDFLiteralFromNormalized(std::string_view normLiteral); diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 4e4222a151..84a9d57f94 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -233,26 +233,11 @@ Alias Visitor::visit(Parser::AliasWithoutBracketsContext* ctx) { return {visitExpressionPimpl(ctx->expression()), visit(ctx->var())}; } -namespace { -// Add the `datasetClauses` to the `targetClauses`. -void addDatasetClauses( - ParsedQuery::DatasetClauses& targetClauses, - const std::vector& datasetClauses) { - for (auto& [dataset, isNamed] : datasetClauses) { - auto& graphs = - isNamed ? targetClauses.namedGraphs_ : targetClauses.defaultGraphs_; - if (!graphs.has_value()) { - graphs.emplace(); - } - graphs.value().insert(std::move(dataset)); - } -} -} // namespace - // ____________________________________________________________________________________ ParsedQuery Visitor::visit(Parser::ConstructQueryContext* ctx) { ParsedQuery query; - addDatasetClauses(query.datasetClauses_, visitVector(ctx->datasetClause())); + query.datasetClauses_ = parsedQuery::DatasetClauses::fromClauses( + visitVector(ctx->datasetClause())); if (ctx->constructTemplate()) { query._clause = visit(ctx->constructTemplate()) .value_or(parsedQuery::ConstructClause{}); @@ -279,7 +264,7 @@ ParsedQuery Visitor::visit(const Parser::AskQueryContext* ctx) { } // ____________________________________________________________________________________ -Visitor::DatasetClause Visitor::visit(Parser::DatasetClauseContext* ctx) { +DatasetClause Visitor::visit(Parser::DatasetClauseContext* ctx) { if (ctx->defaultGraphClause()) { return {.dataset_ = visit(ctx->defaultGraphClause()), .isNamed_ = false}; } else { @@ -946,8 +931,8 @@ void Visitor::visit(Parser::PrefixDeclContext* ctx) { // ____________________________________________________________________________________ ParsedQuery Visitor::visit(Parser::SelectQueryContext* ctx) { parsedQuery_._clause = visit(ctx->selectClause()); - addDatasetClauses(parsedQuery_.datasetClauses_, - visitVector(ctx->datasetClause())); + parsedQuery_.datasetClauses_ = parsedQuery::DatasetClauses::fromClauses( + visitVector(ctx->datasetClause())); auto [pattern, visibleVariables] = visit(ctx->whereClause()); parsedQuery_._rootGraphPattern = std::move(pattern); parsedQuery_.registerVariablesVisibleInQueryBody(visibleVariables); diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index 4a8052aec7..e68087b056 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -27,6 +27,15 @@ class Reversed { auto end() { return _iterable.rend(); } }; +// A named or default graph +struct DatasetClause { + TripleComponent::Iri dataset_; + bool isNamed_; + + // For testing + bool operator==(const DatasetClause& other) const = default; +}; + /** * This is a visitor that takes the parse tree from ANTLR and transforms it into * a `ParsedQuery`. @@ -147,11 +156,6 @@ class SparqlQleverVisitor { [[noreturn]] static ParsedQuery visit(const Parser::AskQueryContext* ctx); - struct DatasetClause { - TripleComponent::Iri dataset_; - bool isNamed_; - }; - DatasetClause visit(Parser::DatasetClauseContext* ctx); TripleComponent::Iri visit(Parser::DefaultGraphClauseContext* ctx); diff --git a/src/util/http/CMakeLists.txt b/src/util/http/CMakeLists.txt index 23cdb2d44f..37d9a86a76 100644 --- a/src/util/http/CMakeLists.txt +++ b/src/util/http/CMakeLists.txt @@ -6,4 +6,4 @@ add_library(http HttpServer.h HttpClient.h HttpClient.cpp HttpUtils.h HttpUtils. websocket/WebSocketSession.cpp websocket/MessageSender.cpp websocket/QueryToSocketDistributor.cpp websocket/QueryHub.cpp websocket/UpdateFetcher.cpp) -qlever_target_link_libraries(http util mediaTypes httpParser OpenSSL::SSL OpenSSL::Crypto) +qlever_target_link_libraries(http parser util mediaTypes httpParser OpenSSL::SSL OpenSSL::Crypto) diff --git a/src/util/http/UrlParser.cpp b/src/util/http/UrlParser.cpp index eaa26f9c3f..3149f97aec 100644 --- a/src/util/http/UrlParser.cpp +++ b/src/util/http/UrlParser.cpp @@ -2,11 +2,25 @@ // Chair of Algorithms and Data Structures. // Authors: Johannes Kalmbach // Hannah Bast +// Julian Mundhahs #include "UrlParser.h" using namespace ad_utility::url_parser; +std::optional ad_utility::url_parser::getParameterCheckAtMostOnce( + const ParamValueMap& map, string_view key) { + if (!map.contains(key)) { + return std::nullopt; + } + auto& value = map.at(key); + if (value.size() != 1) { + throw std::runtime_error( + absl::StrCat("Parameter \"", key, + "\" must be given exactly once. Is: ", value.size())); + } + return value.front(); +} // _____________________________________________________________________________ ParsedUrl ad_utility::url_parser::parseRequestTarget(std::string_view target) { auto urlResult = boost::urls::parse_origin_form(target); @@ -20,16 +34,30 @@ ParsedUrl ad_utility::url_parser::parseRequestTarget(std::string_view target) { } // _____________________________________________________________________________ -ad_utility::HashMap -ad_utility::url_parser::paramsToMap(boost::urls::params_view params) { - ad_utility::HashMap result; +ParamValueMap ad_utility::url_parser::paramsToMap( + boost::urls::params_view params) { + ParamValueMap result; for (const auto& [key, value, _] : params) { - auto [blockingElement, wasInserted] = result.insert({key, value}); - if (!wasInserted) { - throw std::runtime_error( - absl::StrCat("HTTP parameter \"", key, "\" is set twice. It is \"", - blockingElement->second, "\" and \"", value, "\"")); - } + result[key].push_back(value); } return result; } + +// _____________________________________________________________________________ +std::vector ad_utility::url_parser::parseDatasetClauses( + const ParamValueMap& params) { + std::vector datasetClauses; + auto readDatasetClauses = [¶ms, &datasetClauses](const std::string& key, + bool isNamed) { + if (params.contains(key)) { + for (const auto& uri : params.at(key)) { + datasetClauses.emplace_back( + ad_utility::triple_component::Iri::fromIrirefWithoutBrackets(uri), + isNamed); + } + } + }; + readDatasetClauses("default-graph-uri", false); + readDatasetClauses("named-graph-uri", true); + return datasetClauses; +} diff --git a/src/util/http/UrlParser.h b/src/util/http/UrlParser.h index 3743d6b478..bc1374504f 100644 --- a/src/util/http/UrlParser.h +++ b/src/util/http/UrlParser.h @@ -2,50 +2,85 @@ // Chair of Algorithms and Data Structures. // Authors: Johannes Kalmbach // Hannah Bast +// Julian Mundhahs #ifndef QLEVER_URLPARSER_H #define QLEVER_URLPARSER_H #include -#include #include #include -#include "../HashMap.h" +#include "parser/sparqlParser/SparqlQleverVisitor.h" +#include "util/HashMap.h" /** * /brief Some helpers to parse request URLs in QLever. */ namespace ad_utility::url_parser { -// TODO: There can be multiple values for a HTTP query parameter. Some SPARQL -// features require setting a parameter multiple times. Change the interface -// s.t. this is supported. + +// A map that stores the values for parameters. Parameters can be specified +// multiple times with different values. +using ParamValueMap = ad_utility::HashMap>; + +// Extracts a parameter that must be present exactly once. If the parameter is +// not present std::nullopt is returned. If the parameter is present multiple +// times an exception is thrown. +std::optional getParameterCheckAtMostOnce(const ParamValueMap& map, + string_view key); // A parsed URL. // - `path_` is the URL path -// - `parameters_` is a hashmap of the HTTP Query parameters +// - `parameters_` is a map of the HTTP Query parameters struct ParsedUrl { std::string path_; - ad_utility::HashMap parameters_; + ParamValueMap parameters_; +}; + +// The different SPARQL operations that a `ParsedRequest` can represent. +namespace sparqlOperation { +// A SPARQL 1.1 Query +struct Query { + std::string query_; + + bool operator==(const Query& rhs) const = default; }; +// A SPARQL 1.1 Update +struct Update { + std::string update_; + + bool operator==(const Update& rhs) const = default; +}; + +// No operation. This can happen for QLever's custom operations (e.g. +// `cache-stats`). These requests have no operation but are still valid. +struct None { + bool operator==(const None& rhs) const = default; +}; +} // namespace sparqlOperation + // Representation of parsed HTTP request. // - `path_` is the URL path // - `parameters_` is a hashmap of the parameters -// - `query_` contains the Query +// - `operation_` the operation that should be performed struct ParsedRequest { std::string path_; - ad_utility::HashMap parameters_; - std::optional query_; + ParamValueMap parameters_; + std::variant + operation_; }; -// Parse the URL path and the URL query parameters of a HTTP Request target. +// Parse the URL path and the URL query parameters of an HTTP Request target. ParsedUrl parseRequestTarget(std::string_view target); -// Convert the HTTP Query parameters `params` to a hashmap. Throw an error -// if a key is included twice. -ad_utility::HashMap paramsToMap( - boost::urls::params_view params); +// Convert the HTTP Query parameters `params` to a ParamValueMap (a map from +// string to vectors of strings). +ParamValueMap paramsToMap(boost::urls::params_view params); + +// Parse default and named graphs URIs from the parameters. +std::vector parseDatasetClauses(const ParamValueMap& params); } // namespace ad_utility::url_parser #endif // QLEVER_URLPARSER_H diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 69c4e7d8dd..ed8482d66c 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1157,7 +1157,7 @@ TEST(ExportQueryExecutionTrees, CornerCases) { std::string queryNoVariablesVisible = "SELECT ?not ?known WHERE { ?p ?o}"; auto resultNoColumns = runJSONQuery(kg, queryNoVariablesVisible, ad_utility::MediaType::sparqlJson); - ASSERT_TRUE(resultNoColumns["result"]["bindings"].empty()); + ASSERT_TRUE(resultNoColumns["results"]["bindings"].empty()); auto qec = ad_utility::testing::getQec(kg); AD_EXPECT_THROW_WITH_MESSAGE( diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 3f2d6475b7..eebb72e124 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1291,16 +1291,33 @@ TEST(QueryPlanner, DatasetClause) { h::expect("SELECT * FROM WHERE { GRAPH ?g { }}", scan("", "", "", {}, std::nullopt, varG, graphCol)); + // `GROUP BY` inside a `GRAPH ?g` clause. + // We use the `UnorderedJoins` matcher, because the index scan has to be + // resorted by the graph column. + h::expect( + "SELECT * FROM FROM NAMED { GRAPH ?g " + "{ " + "{SELECT ?p { ?p } GROUP BY ?p}" + "} }", + h::GroupBy({Variable{"?p"}, Variable{"?g"}}, {}, + h::UnorderedJoins( + scan("", "?p", "", {}, g2, varG, graphCol)))); + // A complex example with graph variables. h::expect( "SELECT * FROM FROM NAMED { ?p . { ?p } GRAPH ?g " "{ ?p " - "{SELECT * { ?p }}} ?p }", - h::UnorderedJoins(scan("", "?p", "", {}, g1), - scan("", "?p", "", {}, g1), - scan("", "?p", "", {}, g2, varG, graphCol), - scan("", "?p", "", {}, g2, varG, graphCol), - scan("", "?p", "", {}, g1))); + "{SELECT * { ?p }}" + "{SELECT ?p { ?p } GROUP BY ?p}" + "} ?p }", + h::UnorderedJoins( + scan("", "?p", "", {}, g1), scan("", "?p", "", {}, g1), + scan("", "?p", "", {}, g2, varG, graphCol), + scan("", "?p", "", {}, g2, varG, graphCol), + h::GroupBy({Variable{"?p"}, Variable{"?g"}}, {}, + h::UnorderedJoins( + scan("", "?p", "", {}, g2, varG, graphCol))), + scan("", "?p", "", {}, g1))); // We currently don't support repeating the graph variable inside the // graph clause AD_EXPECT_THROW_WITH_MESSAGE( diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index b954fa3ae8..497f23a584 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -14,6 +14,7 @@ #include "engine/CartesianProductJoin.h" #include "engine/CountAvailablePredicates.h" #include "engine/Filter.h" +#include "engine/GroupBy.h" #include "engine/IndexScan.h" #include "engine/Join.h" #include "engine/Minus.h" @@ -56,6 +57,13 @@ QetMatcher RootOperation(auto matcher) { WhenDynamicCastTo(matcher)); } +// Match the `getChildren` method of an `Operation`. +inline Matcher children( + const std::same_as auto&... childMatchers) { + return Property("getChildren", &Operation::getChildren, + ElementsAre(Pointee(childMatchers)...)); +} + // Return a matcher that test whether a given `QueryExecutionTree` contains a // `OperationType` operation the children of which match the // `childMatcher`s. Note that the child matchers are not ordered. @@ -72,9 +80,7 @@ inline auto MatchTypeAndUnorderedChildren = template inline auto MatchTypeAndOrderedChildren = [](const std::same_as auto&... childMatchers) { - return RootOperation( - AllOf(Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatchers)...)))); + return RootOperation(AllOf(children(childMatchers...))); }; /// Return a matcher that checks that a given `QueryExecutionTree` consists of a @@ -128,8 +134,7 @@ constexpr auto TextLimit = [](const size_t n, const QetMatcher& childMatcher, const vector& entityVars, const vector& scoreVars) -> QetMatcher { return RootOperation<::TextLimit>(AllOf( - AD_PROPERTY(::TextLimit, getTextLimit, Eq(n)), - AD_PROPERTY(Operation, getChildren, ElementsAre(Pointee(childMatcher))), + AD_PROPERTY(::TextLimit, getTextLimit, Eq(n)), children(childMatcher), AD_PROPERTY(::TextLimit, getTextRecordVariable, Eq(textRecVar)), AD_PROPERTY(::TextLimit, getEntityVariables, UnorderedElementsAreArray(entityVars)), @@ -172,8 +177,7 @@ inline auto Bind = [](const QetMatcher& childMatcher, AD_PROPERTY(sparqlExpression::SparqlExpressionPimpl, getDescriptor, Eq(expression)))); return RootOperation<::Bind>(AllOf( - AD_PROPERTY(::Bind, bind, AllOf(innerMatcher)), - AD_PROPERTY(Operation, getChildren, ElementsAre(Pointee(childMatcher))))); + AD_PROPERTY(::Bind, bind, AllOf(innerMatcher)), children(childMatcher))); }; // Matcher for a `CountAvailablePredicates` operation. The case of 0 children @@ -182,15 +186,15 @@ inline auto CountAvailablePredicates = [](size_t subjectColumnIdx, const Variable& predicateVar, const Variable& countVar, const std::same_as auto&... childMatchers) - requires(sizeof...(childMatchers) <= 1) { + requires(sizeof...(childMatchers) <= 1) +{ return RootOperation<::CountAvailablePredicates>(AllOf( AD_PROPERTY(::CountAvailablePredicates, subjectColumnIndex, Eq(subjectColumnIdx)), AD_PROPERTY(::CountAvailablePredicates, predicateVariable, Eq(predicateVar)), AD_PROPERTY(::CountAvailablePredicates, countVariable, Eq(countVar)), - AD_PROPERTY(Operation, getChildren, - ElementsAre(Pointee(childMatchers)...)))); + children(childMatchers...))); }; // Same as above, but the subject, predicate, and object are passed in as @@ -279,8 +283,7 @@ inline auto TransitivePath = [](TransitivePathSide left, TransitivePathSide right, size_t minDist, size_t maxDist, const std::same_as auto&... childMatchers) { return RootOperation<::TransitivePathBase>( - AllOf(Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatchers)...)), + AllOf(children(childMatchers...), AD_PROPERTY(TransitivePathBase, getMinDist, Eq(minDist)), AD_PROPERTY(TransitivePathBase, getMaxDist, Eq(maxDist)), AD_PROPERTY(TransitivePathBase, getLeft, @@ -294,12 +297,32 @@ inline auto SpatialJoin = [](size_t maxDist, size_t maxResults, const std::same_as auto&... childMatchers) { return RootOperation<::SpatialJoin>( - AllOf(Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatchers)...)), + AllOf(children(childMatchers...), AD_PROPERTY(SpatialJoin, onlyForTestingGetConfig, Eq(std::pair(maxDist, maxResults))))); }; +// Match a GroupBy operation +static constexpr auto GroupBy = + [](const std::vector& groupByVariables, + const std::vector& aliases, + const QetMatcher& childMatcher) -> QetMatcher { + // TODO Also test the aliases. + auto aliasesToStrings = [](const std::vector& aliases) { + std::vector result; + std::ranges::transform(aliases, std::back_inserter(result), + &Alias::getDescriptor); + return result; + }; + + return RootOperation<::GroupBy>( + AllOf(children(childMatcher), + AD_PROPERTY(::GroupBy, groupByVariables, + UnorderedElementsAreArray(groupByVariables)), + AD_PROPERTY(::GroupBy, aliases, + ResultOf(aliasesToStrings, ContainerEq(aliases))))); +}; + // Match a sort operation. Currently, this is only required by the binary search // version of the transitive path operation. This matcher checks only the // children of the sort operation. @@ -310,8 +333,7 @@ inline auto Sort = MatchTypeAndUnorderedChildren<::Sort>; constexpr auto Filter = [](std::string_view descriptor, const QetMatcher& childMatcher) { return RootOperation<::Filter>( - AllOf(Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatcher))), + AllOf(children(childMatcher), AD_PROPERTY(::Operation, getDescriptor, HasSubstr(descriptor)))); }; @@ -319,8 +341,7 @@ constexpr auto Filter = [](std::string_view descriptor, constexpr auto OrderBy = [](const ::OrderBy::SortedVariables& sortedVariables, const QetMatcher& childMatcher) { return RootOperation<::OrderBy>( - AllOf(Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatcher))), + AllOf(children(childMatcher), AD_PROPERTY(::OrderBy, getSortedVariables, Eq(sortedVariables)))); }; diff --git a/test/ServerTest.cpp b/test/ServerTest.cpp index daeb2527e7..4f98de679d 100644 --- a/test/ServerTest.cpp +++ b/test/ServerTest.cpp @@ -12,18 +12,20 @@ #include "util/http/HttpUtils.h" #include "util/http/UrlParser.h" +using namespace ad_utility::url_parser; +using namespace ad_utility::url_parser::sparqlOperation; + namespace { -auto ParsedRequest = - [](const std::string& path, - const ad_utility::HashMap& parameters, - const std::optional& query) - -> testing::Matcher { +auto ParsedRequestIs = [](const std::string& path, + const ParamValueMap& parameters, + const std::variant& operation) + -> testing::Matcher { return testing::AllOf( AD_FIELD(ad_utility::url_parser::ParsedRequest, path_, testing::Eq(path)), AD_FIELD(ad_utility::url_parser::ParsedRequest, parameters_, testing::ContainerEq(parameters)), - AD_FIELD(ad_utility::url_parser::ParsedRequest, query_, - testing::Eq(query))); + AD_FIELD(ad_utility::url_parser::ParsedRequest, operation_, + testing::Eq(operation))); }; } // namespace @@ -53,49 +55,120 @@ TEST(ServerTest, parseHttpRequest) { "application/x-www-form-urlencoded;charset=UTF-8"; const std::string QUERY = "application/sparql-query"; const std::string UPDATE = "application/sparql-update"; - EXPECT_THAT(parse(MakeGetRequest("/")), ParsedRequest("/", {}, std::nullopt)); + EXPECT_THAT(parse(MakeGetRequest("/")), ParsedRequestIs("/", {}, None{})); EXPECT_THAT(parse(MakeGetRequest("/ping")), - ParsedRequest("/ping", {}, std::nullopt)); + ParsedRequestIs("/ping", {}, None{})); EXPECT_THAT(parse(MakeGetRequest("/?cmd=stats")), - ParsedRequest("/", {{"cmd", "stats"}}, std::nullopt)); - EXPECT_THAT( - parse(MakeGetRequest( - "/?query=SELECT+%2A%20WHERE%20%7B%7D&action=csv_export")), - ParsedRequest("/", {{"action", "csv_export"}}, "SELECT * WHERE {}")); + ParsedRequestIs("/", {{"cmd", {"stats"}}}, None{})); + EXPECT_THAT(parse(MakeGetRequest( + "/?query=SELECT+%2A%20WHERE%20%7B%7D&action=csv_export")), + ParsedRequestIs("/", {{"action", {"csv_export"}}}, + Query{"SELECT * WHERE {}"})); EXPECT_THAT( parse(MakePostRequest("/", URLENCODED, "query=SELECT+%2A%20WHERE%20%7B%7D&send=100")), - ParsedRequest("/", {{"send", "100"}}, "SELECT * WHERE {}")); + ParsedRequestIs("/", {{"send", {"100"}}}, Query{"SELECT * WHERE {}"})); AD_EXPECT_THROW_WITH_MESSAGE( parse(MakePostRequest("/", URLENCODED, "ääär y=SELECT+%2A%20WHERE%20%7B%7D&send=100")), ::testing::HasSubstr("Invalid URL-encoded POST request")); + AD_EXPECT_THROW_WITH_MESSAGE( + parse(MakeGetRequest("/?query=SELECT%20%2A%20WHERE%20%7B%7D&query=SELECT%" + "20%3Ffoo%20WHERE%20%7B%7D")), + ::testing::StrEq( + "Parameter \"query\" must be given exactly once. Is: 2")); + AD_EXPECT_THROW_WITH_MESSAGE( + parse(MakePostRequest("/", URLENCODED, + "query=SELECT%20%2A%20WHERE%20%7B%7D&update=DELETE%" + "20%7B%7D%20WHERE%20%7B%7D")), + ::testing::HasSubstr( + "Request must only contain one of \"query\" and \"update\".")); + AD_EXPECT_THROW_WITH_MESSAGE( + parse(MakePostRequest("/", URLENCODED, + "update=DELETE%20%7B%7D%20WHERE%20%7B%7D&update=" + "DELETE%20%7B%7D%20WHERE%20%7B%7D")), + ::testing::StrEq( + "Parameter \"update\" must be given exactly once. Is: 2")); EXPECT_THAT( parse(MakePostRequest("/", "application/x-www-form-urlencoded", "query=SELECT%20%2A%20WHERE%20%7B%7D&send=100")), - ParsedRequest("/", {{"send", "100"}}, "SELECT * WHERE {}")); + ParsedRequestIs("/", {{"send", {"100"}}}, Query{"SELECT * WHERE {}"})); EXPECT_THAT(parse(MakePostRequest("/", URLENCODED, "query=SELECT%20%2A%20WHERE%20%7B%7D")), - ParsedRequest("/", {}, "SELECT * WHERE {}")); + ParsedRequestIs("/", {}, Query{"SELECT * WHERE {}"})); + EXPECT_THAT( + parse(MakePostRequest( + "/", URLENCODED, + "query=SELECT%20%2A%20WHERE%20%7B%7D&default-graph-uri=https%3A%2F%" + "2Fw3.org%2Fdefault&named-graph-uri=https%3A%2F%2Fw3.org%2F1&named-" + "graph-uri=https%3A%2F%2Fw3.org%2F2")), + ParsedRequestIs( + "/", + {{"default-graph-uri", {"https://w3.org/default"}}, + {"named-graph-uri", {"https://w3.org/1", "https://w3.org/2"}}}, + Query{"SELECT * WHERE {}"})); AD_EXPECT_THROW_WITH_MESSAGE( parse(MakePostRequest("/?send=100", URLENCODED, "query=SELECT%20%2A%20WHERE%20%7B%7D")), testing::StrEq("URL-encoded POST requests must not contain query " "parameters in the URL.")); EXPECT_THAT(parse(MakePostRequest("/", URLENCODED, "cmd=clear-cache")), - ParsedRequest("/", {{"cmd", "clear-cache"}}, std::nullopt)); + ParsedRequestIs("/", {{"cmd", {"clear-cache"}}}, None{})); EXPECT_THAT(parse(MakePostRequest("/", QUERY, "SELECT * WHERE {}")), - ParsedRequest("/", {}, "SELECT * WHERE {}")); - EXPECT_THAT(parse(MakePostRequest("/?send=100", QUERY, "SELECT * WHERE {}")), - ParsedRequest("/", {{"send", "100"}}, "SELECT * WHERE {}")); + ParsedRequestIs("/", {}, Query{"SELECT * WHERE {}"})); + EXPECT_THAT( + parse(MakePostRequest("/?send=100", QUERY, "SELECT * WHERE {}")), + ParsedRequestIs("/", {{"send", {"100"}}}, Query{"SELECT * WHERE {}"})); AD_EXPECT_THROW_WITH_MESSAGE( parse(MakeBasicRequest(http::verb::patch, "/")), testing::StrEq( "Request method \"PATCH\" not supported (has to be GET or POST)")); AD_EXPECT_THROW_WITH_MESSAGE( - parse(MakePostRequest("/", UPDATE, "DELETE * WHERE {}")), + parse(MakePostRequest("/", "invalid/content-type", "")), testing::StrEq( - "POST request with content type \"application/sparql-update\" not " - "supported (must be \"application/x-www-form-urlencoded\" or " - "\"application/sparql-query\")")); + "POST request with content type \"invalid/content-type\" not " + "supported (must be \"application/x-www-form-urlencoded\", " + "\"application/sparql-query\" or \"application/sparql-update\")")); + AD_EXPECT_THROW_WITH_MESSAGE( + parse(MakeGetRequest("/?update=DELETE%20%2A%20WHERE%20%7B%7D")), + testing::StrEq("SPARQL Update is not allowed as GET request.")); + EXPECT_THAT(parse(MakePostRequest("/", UPDATE, "DELETE * WHERE {}")), + ParsedRequestIs("/", {}, Update{"DELETE * WHERE {}"})); + EXPECT_THAT(parse(MakePostRequest("/", URLENCODED, + "update=DELETE%20%2A%20WHERE%20%7B%7D")), + ParsedRequestIs("/", {}, Update{"DELETE * WHERE {}"})); + EXPECT_THAT(parse(MakePostRequest("/", URLENCODED, + "update=DELETE+%2A+WHERE%20%7B%7D")), + ParsedRequestIs("/", {}, Update{"DELETE * WHERE {}"})); +} + +TEST(ServerTest, checkParameter) { + const ParamValueMap exampleParams = {{"foo", {"bar"}}, + {"baz", {"qux", "quux"}}}; + + EXPECT_THAT(Server::checkParameter(exampleParams, "doesNotExist", "", false), + testing::Eq(std::nullopt)); + EXPECT_THAT(Server::checkParameter(exampleParams, "foo", "baz", false), + testing::Eq(std::nullopt)); + AD_EXPECT_THROW_WITH_MESSAGE( + Server::checkParameter(exampleParams, "foo", "bar", false), + testing::StrEq("Access to \"foo=bar\" denied (requires a valid access " + "token), processing of request aborted")); + EXPECT_THAT(Server::checkParameter(exampleParams, "foo", "bar", true), + testing::Optional(testing::StrEq("bar"))); + AD_EXPECT_THROW_WITH_MESSAGE( + Server::checkParameter(exampleParams, "baz", "qux", false), + testing::StrEq("Parameter \"baz\" must be given exactly once. Is: 2")); + EXPECT_THAT(Server::checkParameter(exampleParams, "foo", std::nullopt, true), + testing::Optional(testing::StrEq("bar"))); + AD_EXPECT_THROW_WITH_MESSAGE( + Server::checkParameter(exampleParams, "foo", std::nullopt, false), + testing::StrEq("Access to \"foo=bar\" denied (requires a valid access " + "token), processing of request aborted")); + AD_EXPECT_THROW_WITH_MESSAGE( + Server::checkParameter(exampleParams, "baz", std::nullopt, false), + testing::StrEq("Parameter \"baz\" must be given exactly once. Is: 2")); + AD_EXPECT_THROW_WITH_MESSAGE( + Server::checkParameter(exampleParams, "baz", std::nullopt, true), + testing::StrEq("Parameter \"baz\" must be given exactly once. Is: 2")); } diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index 0c804e5d80..7280b96043 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -392,6 +392,7 @@ TEST_F(ServiceTest, computeResult) { // Check 5: When a siblingTree with variables common to the Service // Clause is passed, the Service Operation shall use the siblings result // to reduce its Query complexity by injecting them as Values Clause + auto iri = ad_utility::testing::iri; using TC = TripleComponent; auto siblingTree = std::make_shared( @@ -402,7 +403,11 @@ TEST_F(ServiceTest, computeResult) { {Variable{"?x"}, Variable{"?y"}, Variable{"?z"}}, {{TC(iri("")), TC(iri("")), TC(iri(""))}, {TC(iri("")), TC(iri("")), TC(iri(""))}, - {TC(iri("")), TC(iri("")), TC(iri(""))}}})); + {TC(iri("")), TC(iri("")), TC(iri(""))}, + // This row will be ignored in the created Values Clause as it + // contains a blank node. + {TC(Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0))), + TC(iri("")), TC(iri(""))}}})); auto parsedServiceClause5 = parsedServiceClause; parsedServiceClause5.graphPatternAsString_ = @@ -592,6 +597,10 @@ TEST_F(ServiceTest, bindingToTripleComponent) { TripleComponent::Literal::fromEscapedRdfLiteral("\"Hallo \\\\Welt\"", "@de")); + EXPECT_EQ(Service::bindingToTripleComponent( + {{"type", "literal"}, {"value", "a\"b\"c"}}), + TripleComponent::Literal::fromEscapedRdfLiteral("\"a\\\"b\\\"c\"")); + EXPECT_EQ(Service::bindingToTripleComponent( {{"type", "uri"}, {"value", "http://doof.org"}}), TripleComponent::Iri::fromIrirefWithoutBrackets("http://doof.org")); @@ -605,3 +614,35 @@ TEST_F(ServiceTest, bindingToTripleComponent) { {{"type", "INVALID_TYPE"}, {"value", "v"}}), ::testing::HasSubstr("Type INVALID_TYPE is undefined")); } + +// ____________________________________________________________________________ +TEST_F(ServiceTest, idToValueForValuesClause) { + auto idToVc = Service::idToValueForValuesClause; + LocalVocab localVocab{}; + auto index = ad_utility::testing::makeIndexWithTestSettings(); + + // blanknode -> nullopt + EXPECT_EQ(idToVc(index, Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0)), + localVocab), + std::nullopt); + + EXPECT_EQ(idToVc(index, Id::makeUndefined(), localVocab), "UNDEF"); + + // simple datatypes -> implicit string representation + EXPECT_EQ(idToVc(index, Id::makeFromInt(42), localVocab), "42"); + EXPECT_EQ(idToVc(index, Id::makeFromDouble(3.14), localVocab), "3.14"); + EXPECT_EQ(idToVc(index, Id::makeFromBool(true), localVocab), "true"); + + // Escape Quotes within literals. + auto str = LocalVocabEntry( + ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes( + "a\"b\"c")); + EXPECT_EQ(idToVc(index, Id::makeFromLocalVocabIndex(&str), localVocab), + "\"a\\\"b\\\"c\""); + + // value with xsd-type + EXPECT_EQ( + idToVc(index, Id::makeFromGeoPoint(GeoPoint(70.5, 130.2)), localVocab) + .value(), + absl::StrCat("\"POINT(130.200000 70.500000)\"^^<", GEO_WKT_LITERAL, ">")); +} diff --git a/test/UrlParserTest.cpp b/test/UrlParserTest.cpp index f3f87c2730..eb6aeebc4d 100644 --- a/test/UrlParserTest.cpp +++ b/test/UrlParserTest.cpp @@ -6,76 +6,125 @@ #include #include "util/GTestHelpers.h" +#include "util/TripleComponentTestHelpers.h" #include "util/http/HttpUtils.h" #include "util/http/UrlParser.h" using namespace ad_utility; +using namespace ad_utility::testing; + +TEST(UrlParserTest, getParameterCheckAtMostOnce) { + const url_parser::ParamValueMap map = {{"once", {"a"}}, + {"multiple_times", {"b", "c"}}}; + + EXPECT_THAT(url_parser::getParameterCheckAtMostOnce(map, "absent"), + ::testing::Eq(std::nullopt)); + EXPECT_THAT(url_parser::getParameterCheckAtMostOnce(map, "once"), + ::testing::Optional(::testing::StrEq("a"))); + AD_EXPECT_THROW_WITH_MESSAGE( + url_parser::getParameterCheckAtMostOnce(map, "multiple_times"), + ::testing::StrEq("Parameter \"multiple_times\" must be given " + "exactly once. Is: 2")); +} TEST(UrlParserTest, paramsToMap) { auto parseParams = - [](const string& queryString) -> HashMap { - boost::urls::url_view url(queryString); + [](const string& queryString) -> url_parser::ParamValueMap { + const boost::urls::url_view url(queryString); return url_parser::paramsToMap(url.params()); }; - auto HashMapEq = [](const HashMap& hashMap) - -> testing::Matcher> { - return testing::ContainerEq(hashMap); + auto HashMapEq = [](const url_parser::ParamValueMap& hashMap) + -> ::testing::Matcher { + return ::testing::ContainerEq(hashMap); }; - AD_EXPECT_THROW_WITH_MESSAGE( - parseParams("?foo=a&foo=b"), - testing::StrEq( - "HTTP parameter \"foo\" is set twice. It is \"a\" and \"b\"")); - // Parameter with key "" is set twice. - EXPECT_THROW(parseParams("?&"), std::runtime_error); - + EXPECT_THAT(parseParams("?foo=a&foo=b"), HashMapEq({{"foo", {"a", "b"}}})); + EXPECT_THAT(parseParams("?&"), HashMapEq({{"", {"", ""}}})); EXPECT_THAT(parseParams("?query=SELECT%20%2A%20WHERE%20%7B%7D"), - HashMapEq({{"query", "SELECT * WHERE {}"}})); - EXPECT_THAT(parseParams("?cmd=stats"), HashMapEq({{"cmd", "stats"}})); + HashMapEq({{"query", {"SELECT * WHERE {}"}}})); + EXPECT_THAT( + parseParams("?query=SELECT%20%2A%20WHERE%20%7B%7D&default-graph-uri=" + "https%3A%2F%2Fw3.org%2Fdefault&named-graph-uri=https%3A%2F%" + "2Fw3.org%2F1&named-graph-uri=https%3A%2F%2Fw3.org%2F2"), + HashMapEq( + {{"query", {"SELECT * WHERE {}"}}, + {"default-graph-uri", {"https://w3.org/default"}}, + {"named-graph-uri", {"https://w3.org/1", "https://w3.org/2"}}})); + EXPECT_THAT(parseParams("?cmd=stats"), HashMapEq({{"cmd", {"stats"}}})); EXPECT_THAT(parseParams("/ping"), HashMapEq({})); EXPECT_THAT(parseParams(""), HashMapEq({})); // Producing a sequence with one empty param here is a design decision by // Boost.URL to make it distinguishable from the case where there is no query // string. - EXPECT_THAT(parseParams("?"), HashMapEq({{"", ""}})); + EXPECT_THAT(parseParams("?"), HashMapEq({{"", {""}}})); EXPECT_THAT(parseParams("/ping?a=b&c=d"), - HashMapEq({{"a", "b"}, {"c", "d"}})); - EXPECT_THAT(parseParams("?foo=a"), HashMapEq({{"foo", "a"}})); + HashMapEq({{"a", {"b"}}, {"c", {"d"}}})); + EXPECT_THAT(parseParams("?foo=a"), HashMapEq({{"foo", {"a"}}})); EXPECT_THAT(parseParams("?a=b&c=d&e=f"), - HashMapEq({{"a", "b"}, {"c", "d"}, {"e", "f"}})); - EXPECT_THAT(parseParams("?=foo"), HashMapEq({{"", "foo"}})); - EXPECT_THAT(parseParams("?=foo&a=b"), HashMapEq({{"", "foo"}, {"a", "b"}})); - EXPECT_THAT(parseParams("?foo="), HashMapEq({{"foo", ""}})); + HashMapEq({{"a", {"b"}}, {"c", {"d"}}, {"e", {"f"}}})); + EXPECT_THAT(parseParams("?=foo"), HashMapEq({{"", {"foo"}}})); + EXPECT_THAT(parseParams("?=foo&a=b"), + HashMapEq({{"", {"foo"}}, {"a", {"b"}}})); + EXPECT_THAT(parseParams("?foo="), HashMapEq({{"foo", {""}}})); EXPECT_THAT(parseParams("?foo=&bar=baz"), - HashMapEq({{"foo", ""}, {"bar", "baz"}})); + HashMapEq({{"foo", {""}}, {"bar", {"baz"}}})); } TEST(UrlParserTest, parseRequestTarget) { using namespace ad_utility::url_parser; auto IsParsedUrl = [](const string& path, - const HashMap& parameters) - -> testing::Matcher { - return testing::AllOf( - AD_FIELD(ParsedUrl, path_, testing::Eq(path)), - AD_FIELD(ParsedUrl, parameters_, testing::ContainerEq(parameters))); + const url_parser::ParamValueMap& parameters) + -> ::testing::Matcher { + return ::testing::AllOf( + AD_FIELD(ParsedUrl, path_, ::testing::Eq(path)), + AD_FIELD(ParsedUrl, parameters_, ::testing::ContainerEq(parameters))); }; EXPECT_THAT(parseRequestTarget("/"), IsParsedUrl("/", {})); EXPECT_THAT(parseRequestTarget("/?cmd=stats"), - IsParsedUrl("/", {{"cmd", "stats"}})); + IsParsedUrl("/", {{"cmd", {"stats"}}})); EXPECT_THAT(parseRequestTarget("/?cmd=clear-cache"), - IsParsedUrl("/", {{"cmd", "clear-cache"}})); + IsParsedUrl("/", {{"cmd", {"clear-cache"}}})); EXPECT_THAT(parseRequestTarget( "/?query=SELECT%20%2A%20WHERE%20%7B%7D&action=csv_export"), - IsParsedUrl("/", {{"query", "SELECT * WHERE {}"}, - {"action", "csv_export"}})); + IsParsedUrl("/", {{"query", {"SELECT * WHERE {}"}}, + {"action", {"csv_export"}}})); EXPECT_THAT(parseRequestTarget("/ping?foo=bar"), - IsParsedUrl("/ping", {{"foo", "bar"}})); + IsParsedUrl("/ping", {{"foo", {"bar"}}})); EXPECT_THAT(parseRequestTarget("/foo??update=bar"), - IsParsedUrl("/foo", {{"?update", "bar"}})); + IsParsedUrl("/foo", {{"?update", {"bar"}}})); // This a complete URL and not only the request target AD_EXPECT_THROW_WITH_MESSAGE( parseRequestTarget("file://more-than-target"), - testing::StrEq("Failed to parse URL: \"file://more-than-target\".")); + ::testing::StrEq("Failed to parse URL: \"file://more-than-target\".")); +} + +TEST(UrlParserTest, parseDatasetClauses) { + using namespace ad_utility::url_parser; + + // Construct the vector from an initializer list without specifying the type. + auto IsDatasets = [](const std::vector& datasetClauses) + -> ::testing::Matcher&> { + return ::testing::ContainerEq(datasetClauses); + }; + + EXPECT_THAT(parseDatasetClauses({}), IsDatasets({})); + EXPECT_THAT( + parseDatasetClauses({{"default-graph-uri", {"https://w3.org/1"}}}), + IsDatasets({{iri(""), false}})); + EXPECT_THAT(parseDatasetClauses({{"named-graph-uri", {"https://w3.org/1"}}}), + IsDatasets({{iri(""), true}})); + EXPECT_THAT(parseDatasetClauses({{"default-graph-uri", {"https://w3.org/1"}}, + {"named-graph-uri", {"https://w3.org/2"}}}), + IsDatasets({{iri(""), false}, + {iri(""), true}})); + EXPECT_THAT( + parseDatasetClauses( + {{"default-graph-uri", {"https://w3.org/1", "https://w3.org/2"}}, + {"named-graph-uri", {"https://w3.org/3", "https://w3.org/4"}}}), + IsDatasets({{iri(""), false}, + {iri(""), false}, + {iri(""), true}, + {iri(""), true}})); }