Skip to content

Commit

Permalink
Fix code smells and add missing tests in the SPARQL parser (#909)
Browse files Browse the repository at this point in the history
Concerning tests, the parsing of IRI function calls like `geof:latitude` was not tested so far, now it is.
  • Loading branch information
joka921 authored Mar 11, 2023
1 parent eea09ba commit 7539be6
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 75 deletions.
87 changes: 45 additions & 42 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ using Parser = SparqlAutomaticParser;

// _____________________________________________________________________________
std::string Visitor::getOriginalInputForContext(
antlr4::ParserRuleContext* context) {
const antlr4::ParserRuleContext* context) {
const auto& fullInput = context->getStart()->getInputStream()->toString();
size_t posBeg = context->getStart()->getStartIndex();
size_t posEnd = context->getStop()->getStopIndex();
Expand Down Expand Up @@ -72,8 +72,7 @@ ExpressionPtr Visitor::processIriFunctionCall(

constexpr static std::string_view geofPrefix =
"<http://www.opengis.net/def/function/geosparql/";
std::string_view iriView = iri;
if (iriView.starts_with(geofPrefix)) {
if (std::string_view iriView = iri; iriView.starts_with(geofPrefix)) {
iriView.remove_prefix(geofPrefix.size());
AD_CONTRACT_CHECK(iriView.ends_with('>'));
iriView.remove_suffix(1);
Expand All @@ -94,10 +93,6 @@ ExpressionPtr Visitor::processIriFunctionCall(
reportNotSupported(ctx, "Function \"" + iri + "\" is");
}

void Visitor::addVisibleVariable(string var) {
addVisibleVariable(Variable{std::move(var)});
}

void Visitor::addVisibleVariable(Variable var) {
visibleVariables_.emplace_back(std::move(var));
}
Expand All @@ -116,17 +111,15 @@ string stripAndLowercaseKeywordLiteral(std::string_view lit) {
template <typename Current, typename... Others>
constexpr const ad_utility::Last<Current, Others...>* unwrapVariant(
const auto& arg) {
if constexpr (sizeof...(Others) > 0) {
if constexpr (ad_utility::isSimilar<decltype(arg), Current>) {
if (const auto ptr = std::get_if<ad_utility::First<Others...>>(&arg)) {
return unwrapVariant<Others...>(*ptr);
}
return nullptr;
} else {
return unwrapVariant<Others...>(arg);
if constexpr (sizeof...(Others) == 0) {
return &arg;
} else if constexpr (ad_utility::isSimilar<decltype(arg), Current>) {
if (const auto ptr = std::get_if<ad_utility::First<Others...>>(&arg)) {
return unwrapVariant<Others...>(*ptr);
}
return nullptr;
} else {
return &arg;
return unwrapVariant<Others...>(arg);
}
}
} // namespace
Expand All @@ -137,7 +130,7 @@ PathTuples joinPredicateAndObject(VarOrPath predicate, ObjectList objectList) {
for (auto& object : objectList.first) {
// TODO The fulltext index should perform the splitting of its keywords,
// and not the SparqlParser.
if (PropertyPath* path = std::get_if<PropertyPath>(&predicate)) {
if (const PropertyPath* path = std::get_if<PropertyPath>(&predicate)) {
if (path->asString() == CONTAINS_WORD_PREDICATE) {
if (const Literal* literal =
unwrapVariant<VarOrTerm, GraphTerm, Literal>(object)) {
Expand Down Expand Up @@ -379,8 +372,8 @@ Visitor::OperationsAndFilters Visitor::visit(
if (ctx->triplesBlock()) {
ops.emplace_back(visit(ctx->triplesBlock()));
}
auto others = visitVector(ctx->graphPatternNotTriplesAndMaybeTriples());
for (auto& [graphPattern, triples] : others) {
for (auto& [graphPattern, triples] :
visitVector(ctx->graphPatternNotTriplesAndMaybeTriples())) {
std::visit(ad_utility::OverloadCallOperator{filter, op},
std::move(graphPattern));

Expand All @@ -405,11 +398,13 @@ Visitor::OperationOrFilterAndMaybeTriples Visitor::visit(

// ____________________________________________________________________________________
BasicGraphPattern Visitor::visit(Parser::TriplesBlockContext* ctx) {
auto iri = [](const Iri& iri) -> TripleComponent { return iri.toSparql(); };
auto blankNode = [](const BlankNode& blankNode) -> TripleComponent {
auto visitIri = [](const Iri& iri) -> TripleComponent {
return iri.toSparql();
};
auto visitBlankNode = [](const BlankNode& blankNode) -> TripleComponent {
return blankNode.toSparql();
};
auto literal = [](const Literal& literal) {
auto visitLiteral = [](const Literal& literal) {
// Problem: ql:contains-word causes the " to be stripped.
// TODO: Move stripAndLowercaseKeywordLiteral out to this point or
// rewrite the Turtle Parser s.t. this code can be integrated into the
Expand All @@ -422,25 +417,32 @@ BasicGraphPattern Visitor::visit(Parser::TriplesBlockContext* ctx) {
return TripleComponent{literal.toSparql()};
}
};
auto graphTerm = [&iri, &blankNode, &literal](const GraphTerm& term) {
return term.visit(
ad_utility::OverloadCallOperator{iri, blankNode, literal});
auto visitGraphTerm = [&visitIri, &visitBlankNode,
&visitLiteral](const GraphTerm& graphTerm) {
return graphTerm.visit(ad_utility::OverloadCallOperator{
visitIri, visitBlankNode, visitLiteral});
};
auto varTriple = [](const Variable& var) { return TripleComponent{var}; };
auto varOrTerm = [&varTriple, &graphTerm](VarOrTerm varOrTerm) {
auto varToTripleComponent = [](const Variable& var) {
return TripleComponent{var};
};
auto visitVarOrTerm = [&varToTripleComponent,
&visitGraphTerm](const VarOrTerm& varOrTerm) {
return varOrTerm.visit(
ad_utility::OverloadCallOperator{varTriple, graphTerm});
ad_utility::OverloadCallOperator{varToTripleComponent, visitGraphTerm});
};

auto varPath = [](const Variable& var) {
auto varToPropertyPath = [](const Variable& var) {
return PropertyPath::fromVariable(var);
};
auto path = [](const PropertyPath& path) { return path; };
auto varOrPath = [&varPath,
&path](ad_utility::sparql_types::VarOrPath varOrPath) {
return std::visit(ad_utility::OverloadCallOperator{varPath, path},
std::move(varOrPath));
};
auto propertyPathIdentity = [](const PropertyPath& path) { return path; };
auto visitVarOrPath =
[&varToPropertyPath, &propertyPathIdentity](
const ad_utility::sparql_types::VarOrPath& varOrPath) {
return std::visit(
ad_utility::OverloadCallOperator{varToPropertyPath,
propertyPathIdentity},
varOrPath);
};

auto registerIfVariable = [this](const auto& variant) {
if (holds_alternative<Variable>(variant)) {
Expand All @@ -449,15 +451,14 @@ BasicGraphPattern Visitor::visit(Parser::TriplesBlockContext* ctx) {
};

auto convertAndRegisterTriple =
[&varOrTerm, &varOrPath,
&registerIfVariable](TripleWithPropertyPath&& triple) -> SparqlTriple {
[&visitVarOrTerm, &visitVarOrPath, &registerIfVariable](
const TripleWithPropertyPath& triple) -> SparqlTriple {
registerIfVariable(triple.subject_);
registerIfVariable(triple.predicate_);
registerIfVariable(triple.object_);

return {varOrTerm(std::move(triple.subject_)),
varOrPath(std::move(triple.predicate_)),
varOrTerm(std::move(triple.object_))};
return {visitVarOrTerm(triple.subject_), visitVarOrPath(triple.predicate_),
visitVarOrTerm(triple.object_)};
};

BasicGraphPattern triples = {ad_utility::transform(
Expand Down Expand Up @@ -515,8 +516,10 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) {
// when computing the result for this operation.
std::vector<Variable> visibleVariablesSoFar = std::move(visibleVariables_);
parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern());
// Note: The `visit` call in the line above has filled the `visibleVariables_`
// member with all the variables visible inside the graph pattern.
std::vector<Variable> visibleVariablesServiceQuery =
ad_utility::removeDuplicates(std::move(visibleVariables_));
ad_utility::removeDuplicates(visibleVariables_);
visibleVariables_ = std::move(visibleVariablesSoFar);
visibleVariables_.insert(visibleVariables_.end(),
visibleVariablesServiceQuery.begin(),
Expand Down Expand Up @@ -1611,7 +1614,7 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) {
// the number of arguments like for `processIriFunctionCall`, since the number
// of arguments is fixed by the grammar and we wouldn't even get here if the
// number were wrong. Hence only the `AD_CONTRACT_CHECK`s.
AD_CONTRACT_CHECK(ctx->children.size() >= 1);
AD_CONTRACT_CHECK(!ctx->children.empty());
auto functionName = ad_utility::getLowercase(ctx->children[0]->getText());
auto argList = visitVector(ctx->expression());
using namespace sparqlExpression;
Expand Down
68 changes: 35 additions & 33 deletions src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class SparqlQleverVisitor {
void visit(Parser::PrologueContext* ctx);

// ___________________________________________________________________________
[[noreturn]] void visit(Parser::BaseDeclContext* ctx);
[[noreturn]] static void visit(Parser::BaseDeclContext* ctx);

// ___________________________________________________________________________
void visit(Parser::PrefixDeclContext* ctx);
Expand All @@ -135,17 +135,17 @@ class SparqlQleverVisitor {
// will always throw an exception because the corresponding feature is not
// (yet) supported by QLever. If they have return types other than void this
// is to make the usage of abstractions like `visitAlternative` easier.
[[noreturn]] ParsedQuery visit(Parser::DescribeQueryContext* ctx);
[[noreturn]] static ParsedQuery visit(Parser::DescribeQueryContext* ctx);

[[noreturn]] ParsedQuery visit(Parser::AskQueryContext* ctx);
[[noreturn]] static ParsedQuery visit(Parser::AskQueryContext* ctx);

[[noreturn]] void visit(Parser::DatasetClauseContext* ctx);
[[noreturn]] static void visit(Parser::DatasetClauseContext* ctx);

[[noreturn]] void visit(Parser::DefaultGraphClauseContext* ctx);
[[noreturn]] static void visit(Parser::DefaultGraphClauseContext* ctx);

[[noreturn]] void visit(Parser::NamedGraphClauseContext* ctx);
[[noreturn]] static void visit(Parser::NamedGraphClauseContext* ctx);

[[noreturn]] void visit(Parser::SourceSelectorContext* ctx);
[[noreturn]] static void visit(Parser::SourceSelectorContext* ctx);

[[nodiscard]] PatternAndVisibleVariables visit(
Parser::WhereClauseContext* ctx);
Expand All @@ -166,11 +166,11 @@ class SparqlQleverVisitor {

[[nodiscard]] LimitOffsetClause visit(Parser::LimitOffsetClausesContext* ctx);

[[nodiscard]] uint64_t visit(Parser::LimitClauseContext* ctx);
[[nodiscard]] static uint64_t visit(Parser::LimitClauseContext* ctx);

[[nodiscard]] uint64_t visit(Parser::OffsetClauseContext* ctx);
[[nodiscard]] static uint64_t visit(Parser::OffsetClauseContext* ctx);

[[nodiscard]] uint64_t visit(Parser::TextLimitClauseContext* ctx);
[[nodiscard]] static uint64_t visit(Parser::TextLimitClauseContext* ctx);

[[nodiscard]] std::optional<parsedQuery::Values> visit(
Parser::ValuesClauseContext* ctx);
Expand All @@ -187,7 +187,7 @@ class SparqlQleverVisitor {
Parser::GraphPatternNotTriplesAndMaybeTriplesContext* ctx);

[[nodiscard]] parsedQuery::BasicGraphPattern visit(
Parser::TriplesBlockContext* ctx);
Parser::TriplesBlockContext* graphTerm);

// Filter clauses are no independent graph patterns themselves, but their
// scope is always the complete graph pattern enclosing them.
Expand All @@ -197,7 +197,7 @@ class SparqlQleverVisitor {
[[nodiscard]] parsedQuery::GraphPatternOperation visit(
Parser::OptionalGraphPatternContext* ctx);

[[noreturn]] parsedQuery::GraphPatternOperation visit(
[[noreturn]] static parsedQuery::GraphPatternOperation visit(
Parser::GraphGraphPatternContext* ctx);

[[nodiscard]] parsedQuery::Service visit(
Expand Down Expand Up @@ -236,7 +236,7 @@ class SparqlQleverVisitor {

[[nodiscard]] vector<ExpressionPtr> visit(Parser::ArgListContext* ctx);

[[noreturn]] void visit(Parser::ExpressionListContext* ctx);
[[noreturn]] static void visit(Parser::ExpressionListContext* ctx);

[[nodiscard]] std::optional<parsedQuery::ConstructClause> visit(
Parser::ConstructTemplateContext* ctx);
Expand Down Expand Up @@ -265,7 +265,7 @@ class SparqlQleverVisitor {

[[nodiscard]] PropertyPath visit(Parser::VerbPathContext* ctx);

[[nodiscard]] Variable visit(Parser::VerbSimpleContext* ctx);
[[nodiscard]] static Variable visit(Parser::VerbSimpleContext* ctx);

[[nodiscard]] PathTuples visit(Parser::TupleWithoutPathContext* ctx);

Expand All @@ -288,17 +288,19 @@ class SparqlQleverVisitor {

[[nodiscard]] PropertyPath visit(Parser::PathEltOrInverseContext* ctx);

[[noreturn]] void visit(Parser::PathModContext* ctx);
[[noreturn]] static void visit(Parser::PathModContext* ctx);

[[nodiscard]] PropertyPath visit(Parser::PathPrimaryContext* ctx);

[[noreturn]] PropertyPath visit(Parser::PathNegatedPropertySetContext*);
[[noreturn]] static PropertyPath visit(
Parser::PathNegatedPropertySetContext*);

[[noreturn]] PropertyPath visit(Parser::PathOneInPropertySetContext* ctx);
[[noreturn]] static PropertyPath visit(
Parser::PathOneInPropertySetContext* ctx);

/// Note that in the SPARQL grammar the INTEGER rule refers to positive
/// integers without an explicit sign.
[[nodiscard]] uint64_t visit(Parser::IntegerContext* ctx);
[[nodiscard]] static uint64_t visit(Parser::IntegerContext* ctx);

[[nodiscard]] Node visit(Parser::TriplesNodeContext* ctx);

Expand All @@ -320,7 +322,7 @@ class SparqlQleverVisitor {

[[nodiscard]] VarOrTerm visit(Parser::VarOrIriContext* ctx);

[[nodiscard]] Variable visit(Parser::VarContext* ctx);
[[nodiscard]] static Variable visit(Parser::VarContext* ctx);

[[nodiscard]] GraphTerm visit(Parser::GraphTermContext* ctx);

Expand Down Expand Up @@ -384,13 +386,13 @@ class SparqlQleverVisitor {

[[nodiscard]] ExpressionPtr visit(Parser::LangExpressionContext* ctx);

[[noreturn]] void visit(Parser::SubstringExpressionContext* ctx);
[[noreturn]] static void visit(Parser::SubstringExpressionContext* ctx);

[[noreturn]] void visit(Parser::StrReplaceExpressionContext* ctx);
[[noreturn]] static void visit(Parser::StrReplaceExpressionContext* ctx);

[[noreturn]] void visit(Parser::ExistsFuncContext* ctx);
[[noreturn]] static void visit(Parser::ExistsFuncContext* ctx);

[[noreturn]] void visit(Parser::NotExistsFuncContext* ctx);
[[noreturn]] static void visit(Parser::NotExistsFuncContext* ctx);

[[nodiscard]] ExpressionPtr visit(Parser::AggregateContext* ctx);

Expand All @@ -400,19 +402,22 @@ class SparqlQleverVisitor {

[[nodiscard]] IntOrDouble visit(Parser::NumericLiteralContext* ctx);

[[nodiscard]] IntOrDouble visit(Parser::NumericLiteralUnsignedContext* ctx);
[[nodiscard]] static IntOrDouble visit(
Parser::NumericLiteralUnsignedContext* ctx);

[[nodiscard]] IntOrDouble visit(Parser::NumericLiteralPositiveContext* ctx);
[[nodiscard]] static IntOrDouble visit(
Parser::NumericLiteralPositiveContext* ctx);

[[nodiscard]] IntOrDouble visit(Parser::NumericLiteralNegativeContext* ctx);
[[nodiscard]] static IntOrDouble visit(
Parser::NumericLiteralNegativeContext* ctx);

[[nodiscard]] bool visit(Parser::BooleanLiteralContext* ctx);
[[nodiscard]] static bool visit(Parser::BooleanLiteralContext* ctx);

[[nodiscard]] string visit(Parser::StringContext* ctx);
[[nodiscard]] static string visit(Parser::StringContext* ctx);

[[nodiscard]] string visit(Parser::IriContext* ctx);

[[nodiscard]] string visit(Parser::IrirefContext* ctx);
[[nodiscard]] static string visit(Parser::IrirefContext* ctx);

[[nodiscard]] string visit(Parser::PrefixedNameContext* ctx);

Expand Down Expand Up @@ -442,17 +447,14 @@ class SparqlQleverVisitor {
// using such parts for further processing (like the body of a SERVICE query,
// which is not valid SPARQL anymore when you remove all whitespace).
static std::string getOriginalInputForContext(
antlr4::ParserRuleContext* context);
const antlr4::ParserRuleContext* context);

// Process an IRI function call. This is used in both `visitFunctionCall` and
// `visitIriOrFunction`.
[[nodiscard]] ExpressionPtr processIriFunctionCall(
const std::string& iri, std::vector<ExpressionPtr> argList,
antlr4::ParserRuleContext*);

// TODO: Remove addVisibleVariable(const string&) when all Types use the
// strong type `Variable`.
void addVisibleVariable(string var);
void addVisibleVariable(Variable var);

[[noreturn]] void throwCollectionsAndBlankNodePathsNotSupported(auto* ctx) {
Expand Down
26 changes: 26 additions & 0 deletions test/SparqlAntlrParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,32 @@ TEST(SparqlParser, GroupCondition) {
"<http://www.opengis.net/def/function/geosparql/latitude>(?test)"));
}

TEST(SparqlParser, FunctionCall) {
auto expectFunctionCall = ExpectCompleteParse<&Parser::functionCall>{};
auto expectFunctionCallFails = ExpectParseFails<&Parser::functionCall>{};

// Correct function calls. Check that the parser picks the correct expression.
expectFunctionCall(
"<http://www.opengis.net/def/function/geosparql/latitude>(?a)",
m::ExpressionWithType<sparqlExpression::LatitudeExpression>());
expectFunctionCall(
"<http://www.opengis.net/def/function/geosparql/longitude>(?a)",
m::ExpressionWithType<sparqlExpression::LongitudeExpression>());
expectFunctionCall(
"<http://www.opengis.net/def/function/geosparql/distance>(?a, ?b)",
m::ExpressionWithType<sparqlExpression::DistExpression>());

// Wrong number of arguments.
expectFunctionCallFails(
"<http://www.opengis.net/def/function/geosparql/distance>(?a)");
// Unknown function with the `geof:` prefix.
expectFunctionCallFails(
"<http://www.opengis.net/def/function/geosparql/notExisting>()");
// Prefix for which no function is known.
expectFunctionCallFails(
"<http://www.no-existing-prefixes.com/notExisting>()");
}

TEST(SparqlParser, GroupClause) {
expectCompleteParse(
parse<&Parser::groupClause>(
Expand Down
Loading

0 comments on commit 7539be6

Please sign in to comment.