-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a VALUES clause to the query of a SERVICE clause to simplify the execution #1341
Changes from 2 commits
b927302
4780aea
ec083e7
7d90398
28946b9
aeb638e
2afa979
94ad347
051bef2
4673248
fbb6e0b
50525b1
21b42be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,12 +5,14 @@ | |
#include "engine/Service.h" | ||
|
||
#include <absl/strings/str_cat.h> | ||
#include <absl/strings/str_join.h> | ||
#include <absl/strings/str_split.h> | ||
|
||
#include "engine/CallFixedSize.h" | ||
#include "engine/ExportQueryExecutionTrees.h" | ||
#include "engine/Values.h" | ||
#include "engine/VariableToColumnMap.h" | ||
#include "global/RuntimeParameters.h" | ||
#include "parser/TokenizerCtre.h" | ||
#include "parser/TurtleParser.h" | ||
#include "util/Exception.h" | ||
|
@@ -95,42 +97,11 @@ ResultTable Service::computeResult() { | |
serviceIriString.remove_suffix(1); | ||
ad_utility::httpUtils::Url serviceUrl{serviceIriString}; | ||
|
||
if (siblingTree_ != nullptr) { | ||
// Get the result of the siblingTree, to (potentially) | ||
// reduce complexity of the SERVICE query. | ||
auto siblingResult = siblingTree_->getResult(); | ||
|
||
const size_t rowLimit = 100; | ||
if (siblingResult->size() < rowLimit) { | ||
auto siblingVariables = siblingTree_->getVariableColumns(); | ||
|
||
// Build value clause for each common variable. | ||
std::string valueClauses = "{ "; | ||
for (const auto& lVar : parsedServiceClause_.visibleVariables_) { | ||
auto it = siblingVariables.find(lVar); | ||
if (it == siblingVariables.end()) { | ||
continue; | ||
} | ||
const auto& sVar = *it; | ||
|
||
valueClauses += "VALUES " + sVar.first.name() + " { "; | ||
for (size_t rowIndex = 0; rowIndex < siblingResult->size(); | ||
++rowIndex) { | ||
const auto& optionalString = | ||
ExportQueryExecutionTrees::idToStringAndType( | ||
siblingTree_->getRootOperation()->getIndex(), | ||
siblingResult->idTable()(rowIndex, sVar.second.columnIndex_), | ||
siblingResult->localVocab()); | ||
if (optionalString.has_value()) { | ||
valueClauses += optionalString.value().first + " "; | ||
} | ||
} | ||
valueClauses += "} . "; | ||
} | ||
|
||
parsedServiceClause_.graphPatternAsString_ = | ||
valueClauses + parsedServiceClause_.graphPatternAsString_.substr(2); | ||
} | ||
// Try to optimize the Service Clause using it's sibling Operation. | ||
if (auto valuesClause = getSiblingValuesClause(); valuesClause.has_value()) { | ||
parsedServiceClause_.graphPatternAsString_ = | ||
"{ " + valuesClause.value() + | ||
parsedServiceClause_.graphPatternAsString_.substr(2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should probably find the first occurence of the |
||
} | ||
|
||
// Construct the query to be sent to the SPARQL endpoint. | ||
|
@@ -200,6 +171,58 @@ ResultTable Service::computeResult() { | |
return {std::move(idTable), resultSortedOn(), std::move(localVocab)}; | ||
} | ||
|
||
std::optional<std::string> Service::getSiblingValuesClause() const { | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (siblingTree_ == nullptr) { | ||
return std::nullopt; | ||
} | ||
|
||
const auto& siblingResult = siblingTree_->getResult(); | ||
if (siblingResult->size() > | ||
RuntimeParameters().get<"service-max-value-rows">()) { | ||
return std::nullopt; | ||
} | ||
|
||
std::vector<ColumnIndex> commonColumnIndices; | ||
const auto& siblingVars = siblingTree_->getVariableColumns(); | ||
std::string vars = ""; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is one bug remaining: |
||
for (const auto& localVar : parsedServiceClause_.visibleVariables_) { | ||
auto it = siblingVars.find(localVar); | ||
if (it == siblingVars.end()) { | ||
continue; | ||
} | ||
vars += it->first.name() + " "; | ||
commonColumnIndices.push_back(it->second.columnIndex_); | ||
} | ||
vars.pop_back(); | ||
if (commonColumnIndices.size() > 1) { | ||
vars = "(" + vars + ")"; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about you ALWAYS add the parentheses, they also work for only one variable, and this makes the code simpler. |
||
|
||
std::string values = " { "; | ||
for (size_t rowIndex = 0; rowIndex < siblingResult->size(); ++rowIndex) { | ||
std::string row; | ||
for (size_t i = 0; i < commonColumnIndices.size(); ++i) { | ||
const auto& optionalString = ExportQueryExecutionTrees::idToStringAndType( | ||
siblingTree_->getRootOperation()->getIndex(), | ||
siblingResult->idTable()(rowIndex, commonColumnIndices[i]), | ||
siblingResult->localVocab()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Somehow the localVocab accessed here with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll have a look at this. I however don't suspect that the problem is the local vocab. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you send me a reproducer (Dataset + Query) where something doesn't work as expected? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok sry, here is a reproducable example query, i use it with Olympics Dataset but it doesn't use the local dataset anyway:
Expected result: 3 rows with the given There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I figured it out.... |
||
|
||
if (optionalString.has_value()) { | ||
row += optionalString.value().first; | ||
if (i < commonColumnIndices.size() - 1) { | ||
row += " "; | ||
} | ||
} | ||
} | ||
if (commonColumnIndices.size() > 1) { | ||
row = "(" + row + ")"; | ||
} | ||
values += row + " "; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For all your |
||
} | ||
|
||
return "VALUES " + vars + values + "} . "; | ||
} | ||
|
||
// ____________________________________________________________________________ | ||
template <size_t I> | ||
void Service::writeTsvResult(cppcoro::generator<std::string_view> tsvResult, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,6 +60,10 @@ class Service : public Operation { | |
GetTsvFunction getTsvFunction = sendHttpOrHttpsRequest, | ||
std::shared_ptr<QueryExecutionTree> siblingTree = nullptr); | ||
|
||
inline void setSiblingTree(std::shared_ptr<QueryExecutionTree> siblingTree) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The inline doesn't do anything here. |
||
siblingTree_ = siblingTree; | ||
} | ||
|
||
// Methods inherited from base class `Operation`. | ||
std::string getDescriptor() const override; | ||
size_t getResultWidth() const override; | ||
|
@@ -86,6 +90,8 @@ class Service : public Operation { | |
// Compute the result using `getTsvFunction_`. | ||
ResultTable computeResult() override; | ||
|
||
std::optional<std::string> getSiblingValuesClause() const; | ||
joka921 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Write the given TSV result to the given result object. The `I` is the width | ||
// of the result table. | ||
// | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The other one is very fishy (expects that you don't get passed the same Service twice).