-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a VALUES clause to the query of a SERVICE clause to simplify the execution #1341
Changes from 1 commit
b927302
4780aea
ec083e7
7d90398
28946b9
aeb638e
2afa979
94ad347
051bef2
4673248
fbb6e0b
50525b1
21b42be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,12 +5,14 @@ | |
#include "engine/Service.h" | ||
|
||
#include <absl/strings/str_cat.h> | ||
#include <absl/strings/str_join.h> | ||
#include <absl/strings/str_split.h> | ||
|
||
#include "engine/CallFixedSize.h" | ||
#include "engine/ExportQueryExecutionTrees.h" | ||
#include "engine/Values.h" | ||
#include "engine/VariableToColumnMap.h" | ||
#include "global/RuntimeParameters.h" | ||
#include "parser/TokenizerCtre.h" | ||
#include "parser/TurtleParser.h" | ||
#include "util/Exception.h" | ||
|
@@ -95,42 +97,11 @@ ResultTable Service::computeResult() { | |
serviceIriString.remove_suffix(1); | ||
ad_utility::httpUtils::Url serviceUrl{serviceIriString}; | ||
|
||
if (siblingTree_ != nullptr) { | ||
// Get the result of the siblingTree, to (potentially) | ||
// reduce complexity of the SERVICE query. | ||
auto siblingResult = siblingTree_->getResult(); | ||
|
||
const size_t rowLimit = 100; | ||
if (siblingResult->size() < rowLimit) { | ||
auto siblingVariables = siblingTree_->getVariableColumns(); | ||
|
||
// Build value clause for each common variable. | ||
std::string valueClauses = "{ "; | ||
for (const auto& lVar : parsedServiceClause_.visibleVariables_) { | ||
auto it = siblingVariables.find(lVar); | ||
if (it == siblingVariables.end()) { | ||
continue; | ||
} | ||
const auto& sVar = *it; | ||
|
||
valueClauses += "VALUES " + sVar.first.name() + " { "; | ||
for (size_t rowIndex = 0; rowIndex < siblingResult->size(); | ||
++rowIndex) { | ||
const auto& optionalString = | ||
ExportQueryExecutionTrees::idToStringAndType( | ||
siblingTree_->getRootOperation()->getIndex(), | ||
siblingResult->idTable()(rowIndex, sVar.second.columnIndex_), | ||
siblingResult->localVocab()); | ||
if (optionalString.has_value()) { | ||
valueClauses += optionalString.value().first + " "; | ||
} | ||
} | ||
valueClauses += "} . "; | ||
} | ||
|
||
parsedServiceClause_.graphPatternAsString_ = | ||
valueClauses + parsedServiceClause_.graphPatternAsString_.substr(2); | ||
} | ||
// Try to optimize the Service Clause using it's sibling Operation. | ||
if (auto valuesClause = getSiblingValuesClause(); valuesClause.has_value()) { | ||
parsedServiceClause_.graphPatternAsString_ = | ||
"{ " + valuesClause.value() + | ||
parsedServiceClause_.graphPatternAsString_.substr(2); | ||
} | ||
|
||
// Construct the query to be sent to the SPARQL endpoint. | ||
|
@@ -200,6 +171,57 @@ ResultTable Service::computeResult() { | |
return {std::move(idTable), resultSortedOn(), std::move(localVocab)}; | ||
} | ||
|
||
std::optional<std::string> Service::getSiblingValuesClause() const { | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (siblingTree_ == nullptr) { | ||
return std::nullopt; | ||
} | ||
|
||
auto siblingResult = siblingTree_->getResult(); | ||
if (siblingResult->size() > | ||
RuntimeParameters().get<"service-max-value-rows">()) { | ||
return std::nullopt; | ||
} | ||
|
||
std::vector<ColumnIndex> commonColumnIndices; | ||
auto siblingVariables = siblingTree_->getVariableColumns(); | ||
std::string vars = ""; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is one bug remaining: |
||
for (const auto& localVar : parsedServiceClause_.visibleVariables_) { | ||
auto it = siblingVariables.find(localVar); | ||
if (it == siblingVariables.end()) { | ||
continue; | ||
} | ||
vars += it->first.name() + " "; | ||
commonColumnIndices.push_back(it->second.columnIndex_); | ||
} | ||
vars.pop_back(); | ||
if (commonColumnIndices.size() > 1) { | ||
vars = "(" + vars + ")"; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about you ALWAYS add the parentheses, they also work for only one variable, and this makes the code simpler. |
||
|
||
std::string valuesClause = " { "; | ||
for (size_t rowIndex = 0; rowIndex < siblingResult->size(); ++rowIndex) { | ||
std::string row; | ||
for (size_t i = 0; i < commonColumnIndices.size(); ++i) { | ||
const auto& optionalString = ExportQueryExecutionTrees::idToStringAndType( | ||
siblingTree_->getRootOperation()->getIndex(), | ||
siblingResult->idTable()(rowIndex, commonColumnIndices[i]), | ||
siblingResult->localVocab()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Somehow the localVocab accessed here with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll have a look at this. I however don't suspect that the problem is the local vocab. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you send me a reproducer (Dataset + Query) where something doesn't work as expected? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok sry, here is a reproducable example query, i use it with Olympics Dataset but it doesn't use the local dataset anyway:
Expected result: 3 rows with the given There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I figured it out.... |
||
if (optionalString.has_value()) { | ||
row += optionalString.value().first; | ||
if (i < commonColumnIndices.size() - 1) { | ||
row += " "; | ||
} | ||
} | ||
} | ||
if (commonColumnIndices.size() > 1) { | ||
row = "(" + row + ")"; | ||
} | ||
valuesClause += row + " "; | ||
} | ||
|
||
return "VALUES " + vars + valuesClause + "} . "; | ||
} | ||
|
||
// ____________________________________________________________________________ | ||
template <size_t I> | ||
void Service::writeTsvResult(cppcoro::generator<std::string_view> tsvResult, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should probably find the first occurence of the
{
, I am not sure if this breaks as soon as someone puts random spaces in their query.