From 80667bdaf407933deec9f3022d3073ea90b2038a Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 20 Apr 2024 00:27:08 +0200 Subject: [PATCH 001/133] Rename ResultTable -> Result --- benchmark/Usage.md | 111 ++++++++++++------ src/engine/Bind.cpp | 6 +- src/engine/Bind.h | 4 +- src/engine/CMakeLists.txt | 2 +- src/engine/CartesianProductJoin.cpp | 6 +- src/engine/CartesianProductJoin.h | 2 +- src/engine/CountAvailablePredicates.cpp | 4 +- src/engine/CountAvailablePredicates.h | 6 +- src/engine/Distinct.cpp | 4 +- src/engine/Distinct.h | 2 +- src/engine/ExportQueryExecutionTrees.cpp | 22 ++-- src/engine/ExportQueryExecutionTrees.h | 12 +- src/engine/Filter.cpp | 6 +- src/engine/Filter.h | 4 +- src/engine/GroupBy.cpp | 8 +- src/engine/GroupBy.h | 2 +- src/engine/HasPredicateScan.cpp | 4 +- src/engine/HasPredicateScan.h | 4 +- src/engine/IndexScan.cpp | 2 +- src/engine/IndexScan.h | 2 +- src/engine/Join.cpp | 8 +- src/engine/Join.h | 2 +- src/engine/Minus.cpp | 4 +- src/engine/Minus.h | 2 +- src/engine/MultiColumnJoin.cpp | 4 +- src/engine/MultiColumnJoin.h | 2 +- src/engine/NeutralElementOperation.h | 2 +- src/engine/Operation.cpp | 8 +- src/engine/Operation.h | 14 +-- src/engine/OptionalJoin.cpp | 4 +- src/engine/OptionalJoin.h | 2 +- src/engine/OrderBy.cpp | 4 +- src/engine/OrderBy.h | 2 +- src/engine/QueryExecutionContext.h | 13 +- src/engine/QueryExecutionTree.h | 4 +- src/engine/QueryPlanner.h | 2 +- src/engine/{ResultTable.cpp => Result.cpp} | 31 +++-- src/engine/{ResultTable.h => Result.h} | 45 +++---- src/engine/ResultType.h | 4 +- src/engine/Service.cpp | 2 +- src/engine/Service.h | 2 +- src/engine/Sort.cpp | 4 +- src/engine/Sort.h | 2 +- src/engine/TextIndexScanForEntity.cpp | 2 +- src/engine/TextIndexScanForEntity.h | 2 +- src/engine/TextIndexScanForWord.cpp | 2 +- src/engine/TextIndexScanForWord.h | 4 +- src/engine/TransitivePathImpl.h | 10 +- src/engine/Union.cpp | 10 +- src/engine/Union.h | 2 +- src/engine/Values.cpp | 2 +- src/engine/Values.h | 2 +- .../SparqlExpressionValueGetters.h | 2 +- src/index/IndexImpl.h | 2 +- src/parser/data/ConstructQueryExportContext.h | 4 +- src/parser/data/Variable.cpp | 2 +- test/BenchmarkMeasurementContainerTest.cpp | 6 +- test/GroupByTest.cpp | 2 +- test/LocalVocabTest.cpp | 4 +- test/ServiceTest.cpp | 2 +- test/SparqlDataTypesTest.cpp | 4 +- test/ValuesTest.cpp | 2 +- test/engine/TextIndexScanTestHelpers.h | 6 +- test/engine/ValuesForTesting.h | 4 +- test/util/OperationTestHelpers.h | 4 +- 65 files changed, 240 insertions(+), 220 deletions(-) rename src/engine/{ResultTable.cpp => Result.cpp} (80%) rename src/engine/{ResultTable.h => Result.h} (83%) diff --git a/benchmark/Usage.md b/benchmark/Usage.md index 5e621b776c..6846e47d03 100644 --- a/benchmark/Usage.md +++ b/benchmark/Usage.md @@ -5,27 +5,35 @@ A quick introduction and tutorial for the macro benchmark infrastructure. As of July 2023 the benchmark infrastructure has the following features: - Measuring the execution time of a function in seconds. -- Organizing a benchmark as a single measurement, as a table of measurements, or as a group of single measurements and tables. +- Organizing a benchmark as a single measurement, as a table of measurements, or as a group of single measurements and + tables. - Printing the measured benchmarks and/or exporting them as a JSON formatted file. - Adding metadata information to benchmarks. -- Passing values at runtime via pre-defined configuration options, which can be set either using a JSON file, or per shorthand in the CLI. +- Passing values at runtime via pre-defined configuration options, which can be set either using a JSON file, or per + shorthand in the CLI. -However, support for the prevention of compiler optimization in benchmarks is not available and still in the planning stage. This can sabotage measured execution times and should be kept in mind, while writing benchmarks. -For example: An expression without a return type and without side effects will get optimized out. Like, for example, when you are trying to measure a `getter` function without using the returned value. +However, support for the prevention of compiler optimization in benchmarks is not available and still in the planning +stage. This can sabotage measured execution times and should be kept in mind, while writing benchmarks. +For example: An expression without a return type and without side effects will get optimized out. Like, for example, +when you are trying to measure a `getter` function without using the returned value. # How to write a basic benchmark -This will be a rather undetailed tutorial, because all the functions and classes have their own documentation, which I do not want to repeat. +This will be a rather undetailed tutorial, because all the functions and classes have their own documentation, which I +do not want to repeat. For a quick hands-on example of the general usage and all features, see `benchmark/BenchmarkExamples.cpp`. -Larger collections of benchmarks, or even a single one, are organized into classes, that inherit from `BenchmarkInterface` in `benchmark/infrastructure/Benchmark.h`. +Larger collections of benchmarks, or even a single one, are organized into classes, that inherit +from `BenchmarkInterface` in `benchmark/infrastructure/Benchmark.h`. Those class implementations should have their own `.cpp` file in the folder `benchmark`. ## Writing the class -To write your own class, first include `benchmark/infrastructure/Benchmark.h` in you file. It includes all needed classes, interfaces and types. -Secondly, you should write your class inside the `ad_benchmark` namespace, where all benchmark infrastructure can be found. +To write your own class, first include `benchmark/infrastructure/Benchmark.h` in you file. It includes all needed +classes, interfaces and types. +Secondly, you should write your class inside the `ad_benchmark` namespace, where all benchmark infrastructure can be +found. Now, the interface for benchmark classes has 5 functions: @@ -33,15 +41,18 @@ Now, the interface for benchmark classes has 5 functions: - `getGeneralMetadata` - `runAllBenchmarks` - `getConfigManager` -- `updateDefaultGeneralMetadata` +- `updateDefaultGeneralMetadata` `name` should just return the name of your benchmark class, so that you can easily identify it later. -`getGeneralMetadata` and `getConfigManager`are getters for member variables, that are used for advanced features. So they can be safely ignored for the time being. +`getGeneralMetadata` and `getConfigManager`are getters for member variables, that are used for advanced features. So +they can be safely ignored for the time being. `updateDefaultGeneralMetadata` exists solely for the infrastructure and should be ignored. -`runAllBenchmarks` is where you actually measure your functions using the classes of `BenchmarkMeasurementContainer.h`, which should be created using `BenchmarkResults`, who will save them and later pass them on for processing by the infrastructure. +`runAllBenchmarks` is where you actually measure your functions using the classes of `BenchmarkMeasurementContainer.h`, +which should be created using `BenchmarkResults`, who will save them and later pass them on for processing by the +infrastructure. Which could look like this: ```c++ @@ -99,7 +110,8 @@ BenchmarkResults runAllBenchmarks(){ } ``` -After writing your class, you will have to register it. For that, simply call the macro `AD_REGISTER_BENCHMARK` with your class name and all needed arguments for construction inside the `ad_benchmark` namespace. +After writing your class, you will have to register it. For that, simply call the macro `AD_REGISTER_BENCHMARK` with +your class name and all needed arguments for construction inside the `ad_benchmark` namespace. For example: ```c++ @@ -109,24 +121,30 @@ AD_REGISTER_BENCHMARK(MyClass, ConstructorArgument1, ConstructorArgument2, ...); ## CMake Registering your finished benchmark class with CMake is rather easy. -Simply add the line `addAndLinkBenchmark(MyBenchmarkClassFile)`, without the ending `.cpp`, to the file `benchmark/CMakeLists.txt`. +Simply add the line `addAndLinkBenchmark(MyBenchmarkClassFile)`, without the ending `.cpp`, to the +file `benchmark/CMakeLists.txt`. It will now be compiled. The compiled version can be found inside the `benchmark` folder inside your build directory. # Using advanced benchmark features ## Metadata -Setting metadata is handled by the `BenchmarkMetadata` class. The set metadata information will be included in the printed output of a compiled benchmark file and in the JSON file export. +Setting metadata is handled by the `BenchmarkMetadata` class. The set metadata information will be included in the +printed output of a compiled benchmark file and in the JSON file export. You can find instances of `BenchmarkMetadata` for your usage at 4 locations: -- At `metadata()` of created `ResultEntry` objects, in order to give metadata information about the benchmark measurement. +- At `metadata()` of created `ResultEntry` objects, in order to give metadata information about the benchmark + measurement. - At `metadata()` of created `ResultGroup` objects, in order to give metadata information about the group. -- At `metadata()` of created `ResultTable` objects, in order to give metadata information about the table. +- At `metadata()` of created `Result` objects, in order to give metadata information about the table. -- In your own class, under the getter `getGeneralMetadata()`. The returned member variable exists in order to give more general metadata information about your benchmark class. This is mostly, so that you don't have to constantly repeat metadata information, that are true for all the things you are measuring, in other places. For example, this would be a good place to give the name of an algorithm, if your whole benchmark class is about measuring the runtimes of one. +- In your own class, under the getter `getGeneralMetadata()`. The returned member variable exists in order to give more + general metadata information about your benchmark class. This is mostly, so that you don't have to constantly repeat + metadata information, that are true for all the things you are measuring, in other places. For example, this would be + a good place to give the name of an algorithm, if your whole benchmark class is about measuring the runtimes of one. ## Runtime configuration @@ -135,7 +153,9 @@ Defining the configuration options and passing values to them. ### Adding options -Adding configuration options is done by adding configuration option to the private member variable `manager_`, accessible via a getter, by using the function `ConfigManager::addOption`. That is best done in the constructor of your class. +Adding configuration options is done by adding configuration option to the private member variable `manager_`, +accessible via a getter, by using the function `ConfigManager::addOption`. That is best done in the constructor of your +class. In our system a configuration option is described by a handful of characteristics: @@ -146,21 +166,23 @@ In our system a configuration option is described by a handful of characteristic 3. If it has a default value. If it hasn't, people will always have to provide their own value at run time. 4. What **type** of values it takes. The following types are available: - - - `bool`. - - `std::string`. - - `int`. - - `size_t` - - `float`. - - A `std::vector` of the previous options. -However, unlike the default value, the value it takes, isn't saved internally. + - `bool`. + - `std::string`. + - `int`. + - `size_t` + - `float`. + - A `std::vector` of the previous options. -Instead, it takes a pointer to a variable of the type, that it itself takes, at construction. Whenever `ConfigOption` gets set to a value, the variable, for which the pointer was passed, is set to that value. +However, unlike the default value, the value it takes, isn't saved internally. + +Instead, it takes a pointer to a variable of the type, that it itself takes, at construction. Whenever `ConfigOption` +gets set to a value, the variable, for which the pointer was passed, is set to that value. Note, that also happens at the time of creation, if a default value was given. -In order to organize `ConfigOption`s easier, `ConfigManager` uses JSON like paths, but made up entirely of strings, for identification. Those are defined at the time of creation of the option and can't be changed later. +In order to organize `ConfigOption`s easier, `ConfigManager` uses JSON like paths, but made up entirely of strings, for +identification. Those are defined at the time of creation of the option and can't be changed later. ### Passing values @@ -168,23 +190,36 @@ Setting the values of the configuration options at runtime can be done in two wa 1. Writing a JSON file and passing the file location via CLI. -2. Using the shorthand described in `src/util/ConfigManager/generated/ConfigShorthand.g4`, by writing it directly as an argument via CLI. Note: The shorthand will overwrite the value of any configuration option, if both ways try to set it. +2. Using the shorthand described in `src/util/ConfigManager/generated/ConfigShorthand.g4`, by writing it directly as an + argument via CLI. Note: The shorthand will overwrite the value of any configuration option, if both ways try to set + it. -The shorthand is basically just normal JSON, but adjusted for easier usage. There are 3 big changes. +The shorthand is basically just normal JSON, but adjusted for easier usage. There are 3 big changes. -First, there are no line breaks allowed. The shorthand is build for usage directly in the CLI, so that is an unneeded feature +First, there are no line breaks allowed. The shorthand is build for usage directly in the CLI, so that is an unneeded +feature -Second, because a configuration is always represented by a JSON object, a shorthand string is always treated, as if it had `{}` braces at the beginning and end. +Second, because a configuration is always represented by a JSON object, a shorthand string is always treated, as if it +had `{}` braces at the beginning and end. -Third, the keys of key-value pairs, for example `"key" : value`, don't need to be surrounded with `"`. `"` is a special symbol in the CLI, and we want to save you the extra work of always typing `\"key\"`. +Third, the keys of key-value pairs, for example `"key" : value`, don't need to be surrounded with `"`. `"` is a special +symbol in the CLI, and we want to save you the extra work of always typing `\"key\"`. -Using those two ways of passing information, the configuration options held by an internally created `ConfigManager` object, will be set. +Using those two ways of passing information, the configuration options held by an internally created `ConfigManager` +object, will be set. -In both of those, you have to write out the complete path to your configuration option and write the value, you wish to set it to, at the end. -For example: Let's say, you defined a configuration option `someNumber` and added it with the path `tableSizes/someNumber`. Then, if you wanted to set it to `20` using JSON, you would have to write: +In both of those, you have to write out the complete path to your configuration option and write the value, you wish to +set it to, at the end. +For example: Let's say, you defined a configuration option `someNumber` and added it with the +path `tableSizes/someNumber`. Then, if you wanted to set it to `20` using JSON, you would have to write: ```json -{"tableSizes": "some-number": 20} +{ + "tableSizes": "some-number" + : + 20 +} ``` -However, **if** the passed values can't be interpreted as the correct types for the configuration options, an exception will be thrown. +However, **if** the passed values can't be interpreted as the correct types for the configuration options, an exception +will be thrown. diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 4dac643245..e985e73305 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -81,10 +81,10 @@ std::vector Bind::getChildren() { } // _____________________________________________________________________________ -ResultTable Bind::computeResult() { +Result Bind::computeResult() { using std::endl; LOG(DEBUG) << "Get input to BIND operation..." << endl; - shared_ptr subRes = _subtree->getResult(); + shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Got input to Bind operation." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -114,7 +114,7 @@ ResultTable Bind::computeResult() { template void Bind::computeExpressionBind( IdTable* outputIdTable, LocalVocab* outputLocalVocab, - const ResultTable& inputResultTable, + const Result& inputResultTable, sparqlExpression::SparqlExpression* expression) const { sparqlExpression::EvaluationContext evaluationContext( *getExecutionContext(), _subtree->getVariableColumns(), diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 82ffc85688..95406b25fc 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -46,13 +46,13 @@ class Bind : public Operation { [[nodiscard]] vector resultSortedOn() const override; private: - ResultTable computeResult() override; + Result computeResult() override; // Implementation for the binding of arbitrary expressions. template void computeExpressionBind( IdTable* outputIdTable, LocalVocab* outputLocalVocab, - const ResultTable& inputResultTable, + const Result& inputResultTable, sparqlExpression::SparqlExpression* expression) const; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 7ddfb8d5ca..6aba4674f8 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -2,7 +2,7 @@ add_subdirectory(sparqlExpressions) add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp) qlever_target_link_libraries(SortPerformanceEstimator) add_library(engine - Engine.cpp QueryExecutionTree.cpp Operation.cpp ResultTable.cpp LocalVocab.cpp + Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp IndexScan.cpp Join.cpp Sort.cpp Distinct.cpp OrderBy.cpp Filter.cpp Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 3c2052bb4d..c26af77e63 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -132,10 +132,10 @@ void CartesianProductJoin::writeResultColumn(std::span targetColumn, } } // ____________________________________________________________________________ -ResultTable CartesianProductJoin::computeResult() { +Result CartesianProductJoin::computeResult() { IdTable result{getExecutionContext()->getAllocator()}; result.setNumColumns(getResultWidth()); - std::vector> subResults; + std::vector> subResults; // We don't need to fully materialize the child results if we have a LIMIT // specified and an OFFSET of 0. @@ -210,7 +210,7 @@ ResultTable CartesianProductJoin::computeResult() { auto subResultsDeref = std::views::transform( subResults, [](auto& x) -> decltype(auto) { return *x; }); return {std::move(result), resultSortedOn(), - ResultTable::getMergedLocalVocab(subResultsDeref)}; + Result::getMergedLocalVocab(subResultsDeref)}; } // ____________________________________________________________________________ diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 3b94760c39..c00867ba05 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -79,7 +79,7 @@ class CartesianProductJoin : public Operation { private: //! Compute the result of the query-subtree rooted at this element.. - ResultTable computeResult() override; + Result computeResult() override; // Copy each element from the `inputColumn` `groupSize` times to the // `targetColumn`. Repeat until the `targetColumn` is copletely filled. Skip diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index 026ff3e416..22b318f377 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -100,7 +100,7 @@ size_t CountAvailablePredicates::getCostEstimate() { } // _____________________________________________________________________________ -ResultTable CountAvailablePredicates::computeResult() { +Result CountAvailablePredicates::computeResult() { LOG(DEBUG) << "CountAvailablePredicates result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(2); @@ -137,7 +137,7 @@ ResultTable CountAvailablePredicates::computeResult() { patterns); return {std::move(idTable), resultSortedOn(), LocalVocab{}}; } else { - std::shared_ptr subresult = subtree_->getResult(); + std::shared_ptr subresult = subtree_->getResult(); LOG(DEBUG) << "CountAvailablePredicates subresult computation done." << std::endl; diff --git a/src/engine/CountAvailablePredicates.h b/src/engine/CountAvailablePredicates.h index 600458f950..d1c152cf67 100644 --- a/src/engine/CountAvailablePredicates.h +++ b/src/engine/CountAvailablePredicates.h @@ -16,8 +16,8 @@ using std::string; using std::vector; -// This Operation takes a ResultTable with at least one column containing ids, -// and a column index referring to such a column. It then creates a ResultTable +// This Operation takes a Result with at least one column containing ids, +// and a column index referring to such a column. It then creates a Result // containing two columns, the first one filled with the ids of all predicates // for which there is an entry in the index with one of the entities in the // specified input column as its subject. The second output column contains a @@ -103,6 +103,6 @@ class CountAvailablePredicates : public Operation { void computePatternTrickAllEntities( IdTable* result, const CompactVectorOfStrings& patterns) const; - ResultTable computeResult() override; + Result computeResult() override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp index 8f3b1c0fb4..e7a7f6ccea 100644 --- a/src/engine/Distinct.cpp +++ b/src/engine/Distinct.cpp @@ -37,10 +37,10 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const { } // _____________________________________________________________________________ -ResultTable Distinct::computeResult() { +Result Distinct::computeResult() { IdTable idTable{getExecutionContext()->getAllocator()}; LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl; - shared_ptr subRes = _subtree->getResult(); + shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Distinct result computation..." << endl; idTable.setNumColumns(subRes->idTable().numColumns()); diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h index 068ebaaa96..f3dde32014 100644 --- a/src/engine/Distinct.h +++ b/src/engine/Distinct.h @@ -55,7 +55,7 @@ class Distinct : public Operation { [[nodiscard]] string getCacheKeyImpl() const override; private: - virtual ResultTable computeResult() override; + virtual Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index caba3d0775..8b4bfa0d49 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -29,7 +29,7 @@ cppcoro::generator ExportQueryExecutionTrees::constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr res, + LimitOffsetClause limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle) { for (size_t i : getRowIndices(limitAndOffset, res->idTable())) { ConstructQueryExportContext context{i, *res, qet.getVariableColumns(), @@ -57,7 +57,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, - std::shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { resultTable->logResultSize(); auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples( @@ -92,7 +92,7 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, const LimitOffsetClause& limitAndOffset, - std::shared_ptr res, + std::shared_ptr res, CancellationHandle cancellationHandle) { auto generator = constructQueryResultToTriples(qet, constructTriples, limitAndOffset, std::move(res), @@ -110,7 +110,7 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, - std::shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(resultTable != nullptr); const IdTable& data = resultTable->idTable(); @@ -268,7 +268,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + shared_ptr resultTable, CancellationHandle cancellationHandle) { using nlohmann::json; @@ -388,7 +388,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + shared_ptr resultTable, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(resultTable != nullptr); LOG(DEBUG) << "Resolving strings for finished binary result...\n"; @@ -418,7 +418,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // This call triggers the possibly expensive computation of the query result // unless the result is already cached. - shared_ptr resultTable = qet.getResult(); + shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); LOG(DEBUG) << "Converting result IDs to their corresponding strings ..." << std::endl; @@ -563,7 +563,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: selectClause.getSelectedVariablesAsStrings(); // This call triggers the possibly expensive computation of the query result // unless the result is already cached. - shared_ptr resultTable = qet.getResult(); + shared_ptr resultTable = qet.getResult(); // In the XML format, the variables don't include the question mark. auto varsWithoutQuestionMark = std::views::transform( @@ -606,7 +606,7 @@ ExportQueryExecutionTrees::constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, - std::shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::sparqlXml); @@ -638,7 +638,7 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, const ad_utility::Timer& requestTimer, uint64_t maxSend, CancellationHandle cancellationHandle) { - shared_ptr resultTable = qet.getResult(); + shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); auto timeResultComputation = requestTimer.msecs(); @@ -725,7 +725,7 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( AD_THROW( "SPARQL-compliant JSON format is only supported for SELECT queries"); } - shared_ptr resultTable = qet.getResult(); + shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); nlohmann::json j; auto limitAndOffset = query._limitOffset; diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index acb4c6a328..0bab7bca5a 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -111,7 +111,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + shared_ptr resultTable, CancellationHandle cancellationHandle); /** @@ -132,7 +132,7 @@ class ExportQueryExecutionTrees { static nlohmann::json idTableToQLeverJSONArray( const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, - std::shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle); // ___________________________________________________________________________ @@ -140,7 +140,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, const LimitOffsetClause& limitAndOffset, - std::shared_ptr res, + std::shared_ptr res, CancellationHandle cancellationHandle); // Generate an RDF graph for a CONSTRUCT query. @@ -148,7 +148,7 @@ class ExportQueryExecutionTrees { constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr res, + LimitOffsetClause limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle); // ___________________________________________________________________________ @@ -156,7 +156,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + shared_ptr resultTable, CancellationHandle cancellationHandle); // ___________________________________________________________________________ @@ -165,7 +165,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, - std::shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle); // _____________________________________________________________________________ diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index b6b96fc363..3ccdb288e3 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -43,9 +43,9 @@ string Filter::getDescriptor() const { } // _____________________________________________________________________________ -ResultTable Filter::computeResult() { +Result Filter::computeResult() { LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl; - shared_ptr subRes = _subtree->getResult(); + shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Filter result computation..." << endl; checkCancellation(); @@ -63,7 +63,7 @@ ResultTable Filter::computeResult() { // _____________________________________________________________________________ template void Filter::computeFilterImpl(IdTable* outputIdTable, - const ResultTable& inputResultTable) { + const Result& inputResultTable) { sparqlExpression::EvaluationContext evaluationContext( *getExecutionContext(), _subtree->getVariableColumns(), inputResultTable.idTable(), getExecutionContext()->getAllocator(), diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 621f29342f..b10402414f 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -58,9 +58,9 @@ class Filter : public Operation { return _subtree->getVariableColumns(); } - ResultTable computeResult() override; + Result computeResult() override; template void computeFilterImpl(IdTable* outputIdTable, - const ResultTable& inputResultTable); + const Result& inputResultTable); }; diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 4bd05f36f2..3bb8ca1912 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -222,9 +222,9 @@ void GroupBy::processGroup( * @param blockEnd Where the group ends. * @param input The input Table. * @param result - * @param inTable The input ResultTable, which is required for its local + * @param inTable The input Result, which is required for its local * vocabulary - * @param outTable The output ResultTable, the vocabulary of which needs to be + * @param outTable The output Result, the vocabulary of which needs to be * expanded for GROUP_CONCAT aggregates * @param distinctHashSet An empty hash set. This is only passed in as an * argument to allow for efficient reusage of its @@ -309,7 +309,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput, *dynResult = std::move(result).toDynamic(); } -ResultTable GroupBy::computeResult() { +Result GroupBy::computeResult() { LOG(DEBUG) << "GroupBy result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -335,7 +335,7 @@ ResultTable GroupBy::computeResult() { auto hashMapOptimizationParams = checkIfHashMapOptimizationPossible(aggregates); - std::shared_ptr subresult; + std::shared_ptr subresult; if (hashMapOptimizationParams.has_value()) { const auto* child = _subtree->getRootOperation()->getChildren().at(0); // Skip sorting diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index e53478c253..81e65cc8ff 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -89,7 +89,7 @@ class GroupBy : public Operation { private: VariableToColumnMap computeVariableToColumnMap() const override; - ResultTable computeResult() override; + Result computeResult() override; template void processGroup(const Aggregate& expression, diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index 897c5e575d..2f15d79451 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -254,7 +254,7 @@ size_t HasPredicateScan::getCostEstimate() { } // ___________________________________________________________________________ -ResultTable HasPredicateScan::computeResult() { +Result HasPredicateScan::computeResult() { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); @@ -365,7 +365,7 @@ void HasPredicateScan::computeFullScan( // ___________________________________________________________________________ template -ResultTable HasPredicateScan::computeSubqueryS( +Result HasPredicateScan::computeSubqueryS( IdTable* dynResult, const CompactVectorOfStrings& patterns) { auto subresult = subtree().getResult(); auto patternCol = subtreeColIdx(); diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h index 954579afa1..8ba4d1cc24 100644 --- a/src/engine/HasPredicateScan.h +++ b/src/engine/HasPredicateScan.h @@ -105,11 +105,11 @@ class HasPredicateScan : public Operation { size_t resultSize); template - ResultTable computeSubqueryS(IdTable* result, + Result computeSubqueryS(IdTable* result, const CompactVectorOfStrings& patterns); private: - ResultTable computeResult() override; + Result computeResult() override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 15ecb9fa83..9996b66eee 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -123,7 +123,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { return variableToColumnMap; } // _____________________________________________________________________________ -ResultTable IndexScan::computeResult() { +Result IndexScan::computeResult() { LOG(DEBUG) << "IndexScan result computation...\n"; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 87f688de17..e832aca3e3 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -104,7 +104,7 @@ class IndexScan : public Operation { std::array getPermutedTriple() const; private: - ResultTable computeResult() override; + Result computeResult() override; vector getChildren() override { return {}; } diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index cba6ab70fb..9bdffbf9e9 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -90,7 +90,7 @@ string Join::getCacheKeyImpl() const { string Join::getDescriptor() const { return "Join on " + _joinVar.name(); } // _____________________________________________________________________________ -ResultTable Join::computeResult() { +Result Join::computeResult() { LOG(DEBUG) << "Getting sub-results for join result computation..." << endl; size_t leftWidth = _left->getResultWidth(); size_t rightWidth = _right->getResultWidth(); @@ -153,7 +153,7 @@ ResultTable Join::computeResult() { } } - shared_ptr leftRes = + shared_ptr leftRes = leftResIfCached ? leftResIfCached : _left->getResult(); checkCancellation(); if (leftRes->size() == 0) { @@ -181,7 +181,7 @@ ResultTable Join::computeResult() { leftRes->getSharedLocalVocab()}; } - shared_ptr rightRes = + shared_ptr rightRes = rightResIfCached ? rightResIfCached : _right->getResult(); checkCancellation(); join(leftRes->idTable(), _leftJoinCol, rightRes->idTable(), _rightJoinCol, @@ -191,7 +191,7 @@ ResultTable Join::computeResult() { // If only one of the two operands has a non-empty local vocabulary, share // with that one (otherwise, throws an exception). return {std::move(idTable), resultSortedOn(), - ResultTable::getMergedLocalVocab(*leftRes, *rightRes)}; + Result::getMergedLocalVocab(*leftRes, *rightRes)}; } // _____________________________________________________________________________ diff --git a/src/engine/Join.h b/src/engine/Join.h index bd662f1b64..ab9c532f3d 100644 --- a/src/engine/Join.h +++ b/src/engine/Join.h @@ -115,7 +115,7 @@ class Join : public Operation { virtual string getCacheKeyImpl() const override; private: - ResultTable computeResult() override; + Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Minus.cpp b/src/engine/Minus.cpp index 8b5c12c9ad..e48f2fd4f5 100644 --- a/src/engine/Minus.cpp +++ b/src/engine/Minus.cpp @@ -32,7 +32,7 @@ string Minus::getCacheKeyImpl() const { string Minus::getDescriptor() const { return "Minus"; } // _____________________________________________________________________________ -ResultTable Minus::computeResult() { +Result Minus::computeResult() { LOG(DEBUG) << "Minus result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -56,7 +56,7 @@ ResultTable Minus::computeResult() { // If only one of the two operands has a non-empty local vocabulary, share // with that one (otherwise, throws an exception). return {std::move(idTable), resultSortedOn(), - ResultTable::getMergedLocalVocab(*leftResult, *rightResult)}; + Result::getMergedLocalVocab(*leftResult, *rightResult)}; } // _____________________________________________________________________________ diff --git a/src/engine/Minus.h b/src/engine/Minus.h index 7052935659..2b18350159 100644 --- a/src/engine/Minus.h +++ b/src/engine/Minus.h @@ -72,7 +72,7 @@ class Minus : public Operation { const IdTableView& a, const IdTableView& b, size_t ia, size_t ib, const vector>& matchedColumns); - ResultTable computeResult() override; + Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp index 0d4de5c978..af2c05691e 100644 --- a/src/engine/MultiColumnJoin.cpp +++ b/src/engine/MultiColumnJoin.cpp @@ -59,7 +59,7 @@ string MultiColumnJoin::getDescriptor() const { } // _____________________________________________________________________________ -ResultTable MultiColumnJoin::computeResult() { +Result MultiColumnJoin::computeResult() { LOG(DEBUG) << "MultiColumnJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -86,7 +86,7 @@ ResultTable MultiColumnJoin::computeResult() { // If only one of the two operands has a non-empty local vocabulary, share // with that one (otherwise, throws an exception). return {std::move(idTable), resultSortedOn(), - ResultTable::getMergedLocalVocab(*leftResult, *rightResult)}; + Result::getMergedLocalVocab(*leftResult, *rightResult)}; } // _____________________________________________________________________________ diff --git a/src/engine/MultiColumnJoin.h b/src/engine/MultiColumnJoin.h index 5b951d858e..57f7910452 100644 --- a/src/engine/MultiColumnJoin.h +++ b/src/engine/MultiColumnJoin.h @@ -63,7 +63,7 @@ class MultiColumnJoin : public Operation { IdTable* resultMightBeUnsorted); private: - ResultTable computeResult() override; + Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/NeutralElementOperation.h b/src/engine/NeutralElementOperation.h index 8729d9ab71..8de6f08beb 100644 --- a/src/engine/NeutralElementOperation.h +++ b/src/engine/NeutralElementOperation.h @@ -40,7 +40,7 @@ class NeutralElementOperation : public Operation { }; private: - ResultTable computeResult() override { + Result computeResult() override { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(0); idTable.resize(1); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 3516371ee7..70ac926b9b 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -70,8 +70,8 @@ void Operation::recursivelySetTimeConstraint( } // ________________________________________________________________________ -shared_ptr Operation::getResult(bool isRoot, - bool onlyReadFromCache) { +shared_ptr Operation::getResult(bool isRoot, + bool onlyReadFromCache) { ad_utility::Timer timer{ad_utility::Timer::Started}; if (isRoot) { @@ -124,7 +124,7 @@ shared_ptr Operation::getResult(bool isRoot, checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); - ResultTable result = computeResult(); + Result result = computeResult(); checkCancellation(); // Compute the datatypes that occur in each column of the result. @@ -221,7 +221,7 @@ std::chrono::milliseconds Operation::remainingTime() const { // _______________________________________________________________________ void Operation::updateRuntimeInformationOnSuccess( - const ResultTable& resultTable, ad_utility::CacheStatus cacheStatus, + const Result& resultTable, ad_utility::CacheStatus cacheStatus, Milliseconds duration, std::optional runtimeInfo) { _runtimeInfo->totalTime_ = duration; _runtimeInfo->numRows_ = resultTable.size(); diff --git a/src/engine/Operation.h b/src/engine/Operation.h index c9c7d30627..316e5ba7b7 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -10,7 +10,7 @@ #include #include "engine/QueryExecutionContext.h" -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/RuntimeInformation.h" #include "engine/VariableToColumnMap.h" #include "parser/data/LimitOffsetClause.h" @@ -146,8 +146,8 @@ class Operation { * @return A shared pointer to the result. May only be `nullptr` if * `onlyReadFromCache` is true. */ - shared_ptr getResult(bool isRoot = false, - bool onlyReadFromCache = false); + shared_ptr getResult(bool isRoot = false, + bool onlyReadFromCache = false); // Use the same cancellation handle for all children of an operation (= query // plan rooted at that operation). As soon as one child is aborted, the whole @@ -195,9 +195,7 @@ class Operation { // Direct access to the `computeResult()` method. This should be only used for // testing, otherwise the `getResult()` function should be used which also // sets the runtime info and uses the cache. - virtual ResultTable computeResultOnlyForTesting() final { - return computeResult(); - } + virtual Result computeResultOnlyForTesting() final { return computeResult(); } protected: // The QueryExecutionContext for this particular element. @@ -246,7 +244,7 @@ class Operation { private: //! Compute the result of the query-subtree rooted at this element.. - virtual ResultTable computeResult() = 0; + virtual Result computeResult() = 0; // Create and store the complete runtime information for this operation after // it has either been succesfully computed or read from the cache. @@ -260,7 +258,7 @@ class Operation { // allowed when `cacheStatus` is `cachedPinned` or `cachedNotPinned`, // otherwise a runtime check will fail. virtual void updateRuntimeInformationOnSuccess( - const ResultTable& resultTable, ad_utility::CacheStatus cacheStatus, + const Result& resultTable, ad_utility::CacheStatus cacheStatus, Milliseconds duration, std::optional runtimeInfo) final; diff --git a/src/engine/OptionalJoin.cpp b/src/engine/OptionalJoin.cpp index 6ec87dc456..70520e9669 100644 --- a/src/engine/OptionalJoin.cpp +++ b/src/engine/OptionalJoin.cpp @@ -88,7 +88,7 @@ string OptionalJoin::getDescriptor() const { } // _____________________________________________________________________________ -ResultTable OptionalJoin::computeResult() { +Result OptionalJoin::computeResult() { LOG(DEBUG) << "OptionalJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -115,7 +115,7 @@ ResultTable OptionalJoin::computeResult() { // If only one of the two operands has a non-empty local vocabulary, share // with that one (otherwise, throws an exception). return {std::move(idTable), resultSortedOn(), - ResultTable::getMergedLocalVocab(*leftResult, *rightResult)}; + Result::getMergedLocalVocab(*leftResult, *rightResult)}; } // _____________________________________________________________________________ diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h index ddc2ae1fd2..ace0168106 100644 --- a/src/engine/OptionalJoin.h +++ b/src/engine/OptionalJoin.h @@ -75,7 +75,7 @@ class OptionalJoin : public Operation { private: void computeSizeEstimateAndMultiplicities(); - ResultTable computeResult() override; + Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/OrderBy.cpp b/src/engine/OrderBy.cpp index 172bdf08fa..6dec565589 100644 --- a/src/engine/OrderBy.cpp +++ b/src/engine/OrderBy.cpp @@ -62,10 +62,10 @@ std::string OrderBy::getDescriptor() const { } // _____________________________________________________________________________ -ResultTable OrderBy::computeResult() { +Result OrderBy::computeResult() { using std::endl; LOG(DEBUG) << "Getting sub-result for OrderBy result computation..." << endl; - shared_ptr subRes = subtree_->getResult(); + shared_ptr subRes = subtree_->getResult(); // TODO proper timeout for sorting operations auto sortEstimateCancellationFactor = diff --git a/src/engine/OrderBy.h b/src/engine/OrderBy.h index 2ccc62c432..c4bc96214a 100644 --- a/src/engine/OrderBy.h +++ b/src/engine/OrderBy.h @@ -78,7 +78,7 @@ class OrderBy : public Operation { } private: - ResultTable computeResult() override; + Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override { return subtree_->getVariableColumns(); diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index c264cc5d41..79a95619c2 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -13,7 +13,7 @@ #include "engine/Engine.h" #include "engine/QueryPlanningCostFactors.h" -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/RuntimeInformation.h" #include "engine/SortPerformanceEstimator.h" #include "global/Constants.h" @@ -31,18 +31,15 @@ using std::vector; class CacheValue { private: - std::shared_ptr _resultTable; + std::shared_ptr _resultTable; RuntimeInformation _runtimeInfo; public: - explicit CacheValue(ResultTable resultTable, RuntimeInformation runtimeInfo) - : _resultTable( - std::make_shared(std::move(resultTable))), + explicit CacheValue(Result resultTable, RuntimeInformation runtimeInfo) + : _resultTable(std::make_shared(std::move(resultTable))), _runtimeInfo(std::move(runtimeInfo)) {} - const shared_ptr& resultTable() const { - return _resultTable; - } + const shared_ptr& resultTable() const { return _resultTable; } const RuntimeInformation& runtimeInfo() const { return _runtimeInfo; } diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 3dde14c1eb..3bade1c59e 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -51,7 +51,7 @@ class QueryExecutionTree { size_t getResultWidth() const { return rootOperation_->getResultWidth(); } - std::shared_ptr getResult() const { + std::shared_ptr getResult() const { return rootOperation_->getResult(isRoot()); } @@ -192,7 +192,7 @@ class QueryExecutionTree { bool isRoot_ = false; // used to distinguish the root from child // operations/subtrees when pinning only the result. - std::shared_ptr cachedResult_ = nullptr; + std::shared_ptr cachedResult_ = nullptr; public: // Helper class to avoid bug in g++ that leads to memory corruption when diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 980741fa8b..cb3c5deb92 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -136,7 +136,7 @@ class QueryPlanner { std::move(operation))} {} std::shared_ptr _qet; - std::shared_ptr _cachedResult; + std::shared_ptr _cachedResult; uint64_t _idsOfIncludedNodes = 0; uint64_t _idsOfIncludedFilters = 0; uint64_t idsOfIncludedTextLimits_ = 0; diff --git a/src/engine/ResultTable.cpp b/src/engine/Result.cpp similarity index 80% rename from src/engine/ResultTable.cpp rename to src/engine/Result.cpp index 73edbaa506..f12d69fa17 100644 --- a/src/engine/ResultTable.cpp +++ b/src/engine/Result.cpp @@ -4,13 +4,12 @@ // Johannes Kalmbach // Hannah Bast -#include "engine/ResultTable.h" - #include "engine/LocalVocab.h" +#include "engine/Result.h" #include "util/Exception.h" // _____________________________________________________________________________ -string ResultTable::asDebugString() const { +string Result::asDebugString() const { std::ostringstream os; os << "First (up to) 5 rows of result with size:\n"; for (size_t i = 0; i < std::min(5, idTable().size()); ++i) { @@ -23,21 +22,19 @@ string ResultTable::asDebugString() const { } // _____________________________________________________________________________ -auto ResultTable::getMergedLocalVocab(const ResultTable& resultTable1, - const ResultTable& resultTable2) +auto Result::getMergedLocalVocab(const Result& resultTable1, + const Result& resultTable2) -> SharedLocalVocabWrapper { return getMergedLocalVocab( std::array{std::cref(resultTable1), std::cref(resultTable2)}); } // _____________________________________________________________________________ -LocalVocab ResultTable::getCopyOfLocalVocab() const { - return localVocab().clone(); -} +LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } // _____________________________________________________________________________ -ResultTable::ResultTable(IdTable idTable, vector sortedBy, - SharedLocalVocabWrapper localVocab) +Result::Result(IdTable idTable, std::vector sortedBy, + SharedLocalVocabWrapper localVocab) : _idTable{std::move(idTable)}, _sortedBy{std::move(sortedBy)}, localVocab_{std::move(localVocab.localVocab_)} { @@ -60,13 +57,13 @@ ResultTable::ResultTable(IdTable idTable, vector sortedBy, } // _____________________________________________________________________________ -ResultTable::ResultTable(IdTable idTable, vector sortedBy, - LocalVocab&& localVocab) - : ResultTable(std::move(idTable), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}) {} +Result::Result(IdTable idTable, std::vector sortedBy, + LocalVocab&& localVocab) + : Result(std::move(idTable), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}) {} // _____________________________________________________________________________ -void ResultTable::applyLimitOffset(const LimitOffsetClause& limitOffset) { +void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). @@ -85,7 +82,7 @@ void ResultTable::applyLimitOffset(const LimitOffsetClause& limitOffset) { } // _____________________________________________________________________________ -auto ResultTable::getOrComputeDatatypeCountsPerColumn() +auto Result::getOrComputeDatatypeCountsPerColumn() -> const DatatypeCountsPerColumn& { if (datatypeCountsPerColumn_.has_value()) { return datatypeCountsPerColumn_.value(); @@ -103,7 +100,7 @@ auto ResultTable::getOrComputeDatatypeCountsPerColumn() } // _____________________________________________________________ -bool ResultTable::checkDefinedness(const VariableToColumnMap& varColMap) { +bool Result::checkDefinedness(const VariableToColumnMap& varColMap) { const auto& datatypesPerColumn = getOrComputeDatatypeCountsPerColumn(); return std::ranges::all_of(varColMap, [&](const auto& varAndCol) { const auto& [columnIndex, mightContainUndef] = varAndCol.second; diff --git a/src/engine/ResultTable.h b/src/engine/Result.h similarity index 83% rename from src/engine/ResultTable.h rename to src/engine/Result.h index 605f9c64a2..979e5f0d96 100644 --- a/src/engine/ResultTable.h +++ b/src/engine/Result.h @@ -16,16 +16,10 @@ #include "parser/data/LimitOffsetClause.h" #include "util/Log.h" -using std::vector; - // The result of an `Operation`. This is the class QLever uses for all // intermediate or final results when processing a SPARQL query. The actual data // is always a table and contained in the member `idTable()`. -// -// TODO: I would find it more appropriate to simply call this class `Result`. -// Otherwise, it's not clear from the names what the difference between a -// `ResultTable` and an `IdTable` is. -class ResultTable { +class Result { private: // The actual entries. IdTable _idTable; @@ -45,20 +39,20 @@ class ResultTable { // This class is used to enforce the invariant, that the `localVocab_` (which // is stored in a shared_ptr) is only shared between instances of the - // `ResultTable` class (where it is `const`). This gives a provable guarantee + // `Result` class (where it is `const`). This gives a provable guarantee // that the `localVocab_` is not mutated through some other code that still // owns a pointer to the same local vocab. class SharedLocalVocabWrapper { private: - // Only the `ResultTable` class is allowed to read or write the stored + // Only the `Result` class is allowed to read or write the stored // `shared_ptr`. Other code can obtain a `SharedLocalVocabWrapper` from a - // `ResultTable` and pass this wrapper into another `ResultTable`, but it + // `Result` and pass this wrapper into another `Result`, but it // can never access the `shared_ptr` directly. std::shared_ptr localVocab_ = std::make_shared(); explicit SharedLocalVocabWrapper(LocalVocabPtr localVocab) : localVocab_{std::move(localVocab)} {} - friend class ResultTable; + friend class Result; public: // Create a wrapper from a `LocalVocab`. This is safe to call also from @@ -84,23 +78,23 @@ class ResultTable { // if expensive checks are enabled, for example by not defining the `NDEBUG` // macro. // The first overload of the constructor is for local vocabs that are shared - // with another `ResultTable` via the `getSharedLocalVocab...` methods below. + // with another `Result` via the `getSharedLocalVocab...` methods below. // The second overload is for newly created local vocabularies. - ResultTable(IdTable idTable, std::vector sortedBy, - SharedLocalVocabWrapper localVocab); - ResultTable(IdTable idTable, std::vector sortedBy, - LocalVocab&& localVocab); + Result(IdTable idTable, std::vector sortedBy, + SharedLocalVocabWrapper localVocab); + Result(IdTable idTable, std::vector sortedBy, + LocalVocab&& localVocab); // Prevent accidental copying of a result table. - ResultTable(const ResultTable& other) = delete; - ResultTable& operator=(const ResultTable& other) = delete; + Result(const Result& other) = delete; + Result& operator=(const Result& other) = delete; // Moving of a result table is OK. - ResultTable(ResultTable&& other) = default; - ResultTable& operator=(ResultTable&& other) = default; + Result(Result&& other) = default; + Result& operator=(Result&& other) = default; // Default destructor. - virtual ~ResultTable() = default; + virtual ~Result() = default; // Get the number of rows of this result. size_t size() const { return _idTable.size(); } @@ -136,15 +130,14 @@ class ResultTable { // Like `getSharedLocalVocabFrom`, but takes more than one result and merges // all the corresponding local vocabs. static SharedLocalVocabWrapper getMergedLocalVocab( - const ResultTable& resultTable1, const ResultTable& resultTable2); + const Result& resultTable1, const Result& resultTable2); - // Overload for more than two `ResultTables` + // Overload for more than two `Results` template - requires std::convertible_to, - const ResultTable&> + requires std::convertible_to, const Result&> static SharedLocalVocabWrapper getMergedLocalVocab(R&& subResults) { std::vector vocabs; - for (const ResultTable& table : subResults) { + for (const Result& table : subResults) { vocabs.push_back(std::to_address(table.localVocab_)); } return SharedLocalVocabWrapper{LocalVocab::merge(vocabs)}; diff --git a/src/engine/ResultType.h b/src/engine/ResultType.h index c7770821f6..f2f292cd3e 100644 --- a/src/engine/ResultType.h +++ b/src/engine/ResultType.h @@ -6,11 +6,11 @@ namespace qlever { -// Enumerate the types of entries we can have in a `ResultTable`. +// Enumerate the types of entries we can have in a `Result`. // // NOTE: This was used in an old version of the QLever code, but no longer is // (because reality is more complicated than "one type per column"). The class -// is still needed for the correctness of the code, see `ResultTable.h`. +// is still needed for the correctness of the code, see `Result.h`. // // TODO: Properly keep track of result types again. In particular, efficiency // should benefit in the common use case where all entries in a column have a diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index ea2fea0402..172e09dab5 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -83,7 +83,7 @@ size_t Service::getCostEstimate() { } // ____________________________________________________________________________ -ResultTable Service::computeResult() { +Result Service::computeResult() { // Get the URL of the SPARQL endpoint. std::string_view serviceIriString = parsedServiceClause_.serviceIri_.iri(); AD_CONTRACT_CHECK(serviceIriString.starts_with("<") && diff --git a/src/engine/Service.h b/src/engine/Service.h index c569d1f1cf..23922e8d78 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -80,7 +80,7 @@ class Service : public Operation { std::string getCacheKeyImpl() const override; // Compute the result using `getTsvFunction_`. - ResultTable computeResult() override; + Result computeResult() override; // Write the given TSV result to the given result object. The `I` is the width // of the result table. diff --git a/src/engine/Sort.cpp b/src/engine/Sort.cpp index 3bb0207e35..33acd4015f 100644 --- a/src/engine/Sort.cpp +++ b/src/engine/Sort.cpp @@ -50,10 +50,10 @@ std::string Sort::getDescriptor() const { } // _____________________________________________________________________________ -ResultTable Sort::computeResult() { +Result Sort::computeResult() { using std::endl; LOG(DEBUG) << "Getting sub-result for Sort result computation..." << endl; - shared_ptr subRes = subtree_->getResult(); + shared_ptr subRes = subtree_->getResult(); // TODO proper timeout for sorting operations auto sortEstimateCancellationFactor = diff --git a/src/engine/Sort.h b/src/engine/Sort.h index 4919133f9d..42bc44badc 100644 --- a/src/engine/Sort.h +++ b/src/engine/Sort.h @@ -67,7 +67,7 @@ class Sort : public Operation { } private: - virtual ResultTable computeResult() override; + virtual Result computeResult() override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override { diff --git a/src/engine/TextIndexScanForEntity.cpp b/src/engine/TextIndexScanForEntity.cpp index 352ecde5e1..b0a4d40346 100644 --- a/src/engine/TextIndexScanForEntity.cpp +++ b/src/engine/TextIndexScanForEntity.cpp @@ -14,7 +14,7 @@ TextIndexScanForEntity::TextIndexScanForEntity( word_(std::move(word)) {} // _____________________________________________________________________________ -ResultTable TextIndexScanForEntity::computeResult() { +Result TextIndexScanForEntity::computeResult() { IdTable idTable = getExecutionContext()->getIndex().getEntityMentionsForWord( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForEntity.h b/src/engine/TextIndexScanForEntity.h index 6926b1d8d7..899cef6af2 100644 --- a/src/engine/TextIndexScanForEntity.h +++ b/src/engine/TextIndexScanForEntity.h @@ -101,7 +101,7 @@ class TextIndexScanForEntity : public Operation { return std::get(varOrFixed_.entity_).second; } - ResultTable computeResult() override; + Result computeResult() override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index c490a88c6f..692a797caa 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -13,7 +13,7 @@ TextIndexScanForWord::TextIndexScanForWord(QueryExecutionContext* qec, isPrefix_(word_.ends_with('*')) {} // _____________________________________________________________________________ -ResultTable TextIndexScanForWord::computeResult() { +Result TextIndexScanForWord::computeResult() { IdTable idTable = getExecutionContext()->getIndex().getWordPostingsForTerm( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForWord.h b/src/engine/TextIndexScanForWord.h index a8d7033195..095b466e53 100644 --- a/src/engine/TextIndexScanForWord.h +++ b/src/engine/TextIndexScanForWord.h @@ -48,9 +48,9 @@ class TextIndexScanForWord : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - // Returns a ResultTable containing an IdTable with the columns being + // Returns a Result containing an IdTable with the columns being // the text variable and the completed word (if it was prefixed) - ResultTable computeResult() override; + Result computeResult() override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index 16fe987dbb..0f352802bb 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -140,9 +140,9 @@ class TransitivePathImpl : public TransitivePathBase { * on the time it takes to compute the hull. The set of nodes on the * start side should be as small as possible. * - * @return ResultTable The result of the TransitivePath operation + * @return Result The result of the TransitivePath operation */ - ResultTable computeResult() override { + Result computeResult() override { if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && rhs_.isVariable()) { AD_THROW( @@ -150,7 +150,7 @@ class TransitivePathImpl : public TransitivePathBase { "not supported"); } auto [startSide, targetSide] = decideDirection(); - shared_ptr subRes = subtree_->getResult(); + shared_ptr subRes = subtree_->getResult(); IdTable idTable{allocator()}; @@ -159,7 +159,7 @@ class TransitivePathImpl : public TransitivePathBase { size_t subWidth = subRes->idTable().numColumns(); if (startSide.isBoundVariable()) { - shared_ptr sideRes = + shared_ptr sideRes = startSide.treeAndCol_.value().first->getResult(); size_t sideWidth = sideRes->idTable().numColumns(); @@ -169,7 +169,7 @@ class TransitivePathImpl : public TransitivePathBase { sideRes->idTable()); return {std::move(idTable), resultSortedOn(), - ResultTable::getMergedLocalVocab(*sideRes, *subRes)}; + Result::getMergedLocalVocab(*sideRes, *subRes)}; } CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), &TransitivePathImpl::computeTransitivePath, this, diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 82ed91aef3..02db123fbc 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -158,10 +158,10 @@ size_t Union::getCostEstimate() { getSizeEstimateBeforeLimit(); } -ResultTable Union::computeResult() { +Result Union::computeResult() { LOG(DEBUG) << "Union result computation..." << std::endl; - shared_ptr subRes1 = _subtrees[0]->getResult(); - shared_ptr subRes2 = _subtrees[1]->getResult(); + shared_ptr subRes1 = _subtrees[0]->getResult(); + shared_ptr subRes2 = _subtrees[1]->getResult(); LOG(DEBUG) << "Union subresult computation done." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -173,8 +173,8 @@ ResultTable Union::computeResult() { LOG(DEBUG) << "Union result computation done" << std::endl; // If only one of the two operands has a non-empty local vocabulary, share // with that one (otherwise, throws an exception). - return ResultTable{std::move(idTable), resultSortedOn(), - ResultTable::getMergedLocalVocab(*subRes1, *subRes2)}; + return Result{std::move(idTable), resultSortedOn(), + Result::getMergedLocalVocab(*subRes1, *subRes2)}; } void Union::computeUnion( diff --git a/src/engine/Union.h b/src/engine/Union.h index 4be8694e95..99ad5d9ac3 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -61,7 +61,7 @@ class Union : public Operation { } private: - virtual ResultTable computeResult() override; + virtual Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Values.cpp b/src/engine/Values.cpp index 3362317f55..2e40dc3c60 100644 --- a/src/engine/Values.cpp +++ b/src/engine/Values.cpp @@ -108,7 +108,7 @@ void Values::computeMultiplicities() { } // ____________________________________________________________________________ -ResultTable Values::computeResult() { +Result Values::computeResult() { // Set basic properties of the result table. IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/Values.h b/src/engine/Values.h index b69c2392ea..7d291362b3 100644 --- a/src/engine/Values.h +++ b/src/engine/Values.h @@ -48,7 +48,7 @@ class Values : public Operation { public: // These two are also used by class `Service`, hence public. - virtual ResultTable computeResult() override; + virtual Result computeResult() override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index 15588685c3..2e7a074bd4 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -9,7 +9,7 @@ #include #include "engine/ExportQueryExecutionTrees.h" -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/sparqlExpressions/SparqlExpressionTypes.h" #include "global/Id.h" #include "util/ConstexprSmallString.h" diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 1d563f21a6..57962104e2 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -12,7 +12,7 @@ #include #include -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/idTable/CompressedExternalIdTable.h" #include "global/Pattern.h" #include "global/SpecialIds.h" diff --git a/src/parser/data/ConstructQueryExportContext.h b/src/parser/data/ConstructQueryExportContext.h index 13b78826e5..253e8614bb 100644 --- a/src/parser/data/ConstructQueryExportContext.h +++ b/src/parser/data/ConstructQueryExportContext.h @@ -6,7 +6,7 @@ #include -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/VariableToColumnMap.h" #include "parser/data/Variable.h" #include "util/HashMap.h" @@ -18,7 +18,7 @@ enum struct PositionInTriple : int { SUBJECT, PREDICATE, OBJECT }; // All the data that is needed to evaluate an element in a construct query. struct ConstructQueryExportContext { const size_t _row; - const ResultTable& _res; + const Result& _res; const VariableToColumnMap& _variableColumns; const Index& _qecIndex; }; diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index bf0f5e5e82..8b7a3207e7 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -29,7 +29,7 @@ Variable::Variable(std::string name) : _name{std::move(name)} { // Call stack. Most notably the check which columns belongs to this variable // should be much further up in the call stack. size_t row = context._row; - const ResultTable& res = context._res; + const Result& res = context._res; const auto& variableColumns = context._variableColumns; const Index& qecIndex = context._qecIndex; const auto& idTable = res.idTable(); diff --git a/test/BenchmarkMeasurementContainerTest.cpp b/test/BenchmarkMeasurementContainerTest.cpp index cb1c71bde2..a500c3df2c 100644 --- a/test/BenchmarkMeasurementContainerTest.cpp +++ b/test/BenchmarkMeasurementContainerTest.cpp @@ -88,7 +88,7 @@ TEST(BenchmarkMeasurementContainerTest, ResultGroup) { } /* -Check the content of a `ResultTable` row. +Check the content of a `Result` row. */ static void checkResultTableRow(const ResultTable& table, const size_t& rowNumber, @@ -157,7 +157,7 @@ TEST(BenchmarkMeasurementContainerTest, ResultTable) { Special case: A table with no columns. Should throw an exception on creation, because you can't add columns after creation and a table without columns is quite the stupid idea. Additionally, operations on such an empty - table can create segmentation faults. The string conversion of `ResultTable` + table can create segmentation faults. The string conversion of `Result` uses `std::ranges::max`, which really doesn't play well with empty vectors. */ ASSERT_ANY_THROW(ResultTable("1 by 0 table", {"Test"}, {})); @@ -308,7 +308,7 @@ TEST(BenchmarkMeasurementContainerTest, ResultTableEraseRow) { TEST(BenchmarkMeasurementContainerTest, ResultGroupDeleteMember) { /* - Add the given number of dummy `ResultEntry`s and dummy `ResultTable`s to the + Add the given number of dummy `ResultEntry`s and dummy `Result`s to the given group. */ auto addDummyMembers = [](ResultGroup* group, const size_t numOfEntries) { diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index 3729ef8f56..8906fef14e 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -193,7 +193,7 @@ TEST_F(GroupByTest, doGroupBy) { {ParsedQuery::AggregateType::AVG, 3, 22, nullptr}, {ParsedQuery::AggregateType::AVG, 4, 23, nullptr}}; - ResultTable outTable{allocator()}; + Result outTable{allocator()}; // This is normally done when calling computeResult in the GroupBy // operation. diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index 429aa8fb8b..def4541b1c 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -21,7 +21,7 @@ #include "engine/OptionalJoin.h" #include "engine/OrderBy.h" #include "engine/QueryExecutionTree.h" -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/Sort.h" #include "engine/TransitivePathBase.h" #include "engine/Union.h" @@ -189,7 +189,7 @@ TEST(LocalVocab, propagation) { }; std::ranges::transform(expectedWordsAsStrings, std::back_inserter(expectedWords), toLitOrIri); - std::shared_ptr resultTable = operation.getResult(); + std::shared_ptr resultTable = operation.getResult(); ASSERT_TRUE(resultTable) << "Operation: " << operation.getDescriptor() << std::endl; TestWords localVocabWords = diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index d29457c3d9..dc27f25a8b 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -163,7 +163,7 @@ TEST_F(ServiceTest, computeResult) { getTsvFunctionFactory( expectedUrl, expectedSparqlQuery, "?x\t?y\n\t\n\t\n\t\n\t\n")}; - std::shared_ptr result = serviceOperation4.getResult(); + std::shared_ptr result = serviceOperation4.getResult(); // Check that `` and `` were contained in the original vocabulary and // that ``, ``, `` were added to the (initially empty) local diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp index d6f24ca6f0..e4cf4279fe 100644 --- a/test/SparqlDataTypesTest.cpp +++ b/test/SparqlDataTypesTest.cpp @@ -16,7 +16,7 @@ using enum PositionInTriple; namespace { struct ContextWrapper { Index _index{ad_utility::makeUnlimitedAllocator()}; - ResultTable _resultTable{ + Result _resultTable{ IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}; // TODO `VariableToColumnMap` VariableToColumnMap _hashMap{}; @@ -27,7 +27,7 @@ struct ContextWrapper { void setIdTable(IdTable&& table) { _resultTable = - ResultTable{std::move(table), {}, _resultTable.getSharedLocalVocab()}; + Result{std::move(table), {}, _resultTable.getSharedLocalVocab()}; } }; diff --git a/test/ValuesTest.cpp b/test/ValuesTest.cpp index 21b5691cf9..901f06d332 100644 --- a/test/ValuesTest.cpp +++ b/test/ValuesTest.cpp @@ -9,7 +9,7 @@ #include "./util/IdTableHelpers.h" #include "./util/IdTestHelpers.h" #include "./util/TripleComponentTestHelpers.h" -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/Values.h" #include "engine/idTable/IdTable.h" #include "util/IndexTestHelpers.h" diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 25ff7f3aaf..ddfa1aa5df 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -9,7 +9,7 @@ namespace textIndexScanTestHelpers { // obtain the textRecord using idToOptionalString. // TODO: Implement a more elegant/stable version inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, - const ResultTable& result, + const Result& result, const size_t& rowIndex) { return qec->getIndex() .idToOptionalString( @@ -18,7 +18,7 @@ inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, } inline string getEntityFromResultTable(const QueryExecutionContext* qec, - const ResultTable& result, + const Result& result, const size_t& rowIndex) { return qec->getIndex() .idToOptionalString( @@ -27,7 +27,7 @@ inline string getEntityFromResultTable(const QueryExecutionContext* qec, } inline string getWordFromResultTable(const QueryExecutionContext* qec, - const ResultTable& result, + const Result& result, const size_t& rowIndex) { return qec->getIndex() .idToOptionalString( diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 2c91fc1edc..07f2478a4b 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -6,7 +6,7 @@ #include "engine/Operation.h" #include "engine/QueryExecutionContext.h" -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "util/Algorithm.h" #include "util/Random.h" @@ -49,7 +49,7 @@ class ValuesForTesting : public Operation { size_t& costEstimate() { return costEstimate_; } // ___________________________________________________________________________ - ResultTable computeResult() override { + Result computeResult() override { auto table = table_.clone(); if (supportsLimit_) { table.erase(table.begin() + getLimit().upperBound(table.size()), diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index 8764aab4b7..088ecff65f 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -31,7 +31,7 @@ class StallForeverOperation : public Operation { using Operation::Operation; // Do-nothing operation that runs for 100ms without computing anything, but // which can be cancelled. - ResultTable computeResult() override { + Result computeResult() override { auto end = std::chrono::steady_clock::now() + 100ms; while (std::chrono::steady_clock::now() < end) { checkCancellation(); @@ -73,7 +73,7 @@ class ShallowParentOperation : public Operation { return {child_.get()}; } - ResultTable computeResult() override { + Result computeResult() override { auto childResult = child_->getResult(); return {childResult->idTable().clone(), resultSortedOn(), childResult->getSharedLocalVocab()}; From 31b2c11c4ce644347912c0a4cd718336d7cf1fc3 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 20 Apr 2024 00:31:11 +0200 Subject: [PATCH 002/133] Wrap idTable in variant --- src/engine/Result.cpp | 25 ++++++++++++++----------- src/engine/Result.h | 12 ++++++------ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index f12d69fa17..dee18aff28 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -4,8 +4,9 @@ // Johannes Kalmbach // Hannah Bast -#include "engine/LocalVocab.h" #include "engine/Result.h" + +#include "engine/LocalVocab.h" #include "util/Exception.h" // _____________________________________________________________________________ @@ -67,18 +68,19 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). + auto& idTable = std::get<0>(_idTable); std::ranges::for_each( - _idTable.getColumns(), - [offset = limitOffset.actualOffset(_idTable.numRows()), + idTable.getColumns(), + [offset = limitOffset.actualOffset(idTable.numRows()), upperBound = - limitOffset.upperBound(_idTable.numRows())](std::span column) { + limitOffset.upperBound(idTable.numRows())](std::span column) { std::shift_left(column.begin(), column.begin() + upperBound, offset); }); // Resize the `IdTable` if necessary. - size_t targetSize = limitOffset.actualSize(_idTable.numRows()); - AD_CORRECTNESS_CHECK(targetSize <= _idTable.numRows()); - _idTable.resize(targetSize); - _idTable.shrinkToFit(); + size_t targetSize = limitOffset.actualSize(idTable.numRows()); + AD_CORRECTNESS_CHECK(targetSize <= idTable.numRows()); + idTable.resize(targetSize); + idTable.shrinkToFit(); } // _____________________________________________________________________________ @@ -87,10 +89,11 @@ auto Result::getOrComputeDatatypeCountsPerColumn() if (datatypeCountsPerColumn_.has_value()) { return datatypeCountsPerColumn_.value(); } + auto& idTable = std::get<0>(_idTable); auto& types = datatypeCountsPerColumn_.emplace(); - types.resize(_idTable.numColumns()); - for (size_t i = 0; i < _idTable.numColumns(); ++i) { - const auto& col = _idTable.getColumn(i); + types.resize(idTable.numColumns()); + for (size_t i = 0; i < idTable.numColumns(); ++i) { + const auto& col = idTable.getColumn(i); auto& datatypes = types.at(i); for (Id id : col) { ++datatypes[static_cast(id.getDatatype())]; diff --git a/src/engine/Result.h b/src/engine/Result.h index 979e5f0d96..336e83adbf 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include "engine/LocalVocab.h" @@ -22,7 +23,7 @@ class Result { private: // The actual entries. - IdTable _idTable; + std::variant _idTable; // The column indices by which the result is sorted (primary sort key first). // Empty if the result is not sorted on any column. @@ -48,8 +49,7 @@ class Result { // `shared_ptr`. Other code can obtain a `SharedLocalVocabWrapper` from a // `Result` and pass this wrapper into another `Result`, but it // can never access the `shared_ptr` directly. - std::shared_ptr localVocab_ = - std::make_shared(); + std::shared_ptr localVocab_; explicit SharedLocalVocabWrapper(LocalVocabPtr localVocab) : localVocab_{std::move(localVocab)} {} friend class Result; @@ -97,13 +97,13 @@ class Result { virtual ~Result() = default; // Get the number of rows of this result. - size_t size() const { return _idTable.size(); } + size_t size() const { return std::get<0>(_idTable).size(); } // Get the number of columns of this result. - size_t width() const { return _idTable.numColumns(); } + size_t width() const { return std::get<0>(_idTable).numColumns(); } // Const access to the underlying `IdTable`. - const IdTable& idTable() const { return _idTable; } + const IdTable& idTable() const { return std::get<0>(_idTable); } // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return _sortedBy; } From 4d0204c2ea3e0e8865534886997afaf4fa633854 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 20 Apr 2024 01:23:12 +0200 Subject: [PATCH 003/133] Add ability to create `Result` from generator --- src/engine/Bind.cpp | 2 +- src/engine/Bind.h | 2 +- src/engine/CartesianProductJoin.cpp | 11 ++++---- src/engine/CartesianProductJoin.h | 2 +- src/engine/CountAvailablePredicates.cpp | 2 +- src/engine/CountAvailablePredicates.h | 2 +- src/engine/Distinct.cpp | 2 +- src/engine/Distinct.h | 2 +- src/engine/ExportQueryExecutionTrees.cpp | 8 +++--- src/engine/Filter.cpp | 2 +- src/engine/Filter.h | 2 +- src/engine/GroupBy.cpp | 2 +- src/engine/GroupBy.h | 2 +- src/engine/HasPredicateScan.cpp | 2 +- src/engine/HasPredicateScan.h | 2 +- src/engine/IndexScan.cpp | 2 +- src/engine/IndexScan.h | 2 +- src/engine/Join.cpp | 4 +-- src/engine/Join.h | 2 +- src/engine/Minus.cpp | 7 ++--- src/engine/Minus.h | 2 +- src/engine/MultiColumnJoin.cpp | 5 ++-- src/engine/MultiColumnJoin.h | 2 +- src/engine/NeutralElementOperation.h | 2 +- src/engine/Operation.cpp | 15 ++++++----- src/engine/Operation.h | 10 ++++--- src/engine/OptionalJoin.cpp | 5 ++-- src/engine/OptionalJoin.h | 2 +- src/engine/OrderBy.cpp | 4 +-- src/engine/OrderBy.h | 2 +- src/engine/QueryExecutionContext.h | 5 ++-- src/engine/QueryExecutionTree.cpp | 4 +-- src/engine/QueryExecutionTree.h | 4 +-- src/engine/Result.cpp | 31 ++++++++++++++++++++-- src/engine/Result.h | 25 +++++++++-------- src/engine/Service.cpp | 2 +- src/engine/Service.h | 2 +- src/engine/Sort.cpp | 4 +-- src/engine/Sort.h | 2 +- src/engine/TextIndexScanForEntity.cpp | 2 +- src/engine/TextIndexScanForEntity.h | 2 +- src/engine/TextIndexScanForWord.cpp | 2 +- src/engine/TextIndexScanForWord.h | 2 +- src/engine/TransitivePathImpl.h | 2 +- src/engine/Union.cpp | 2 +- src/engine/Union.h | 2 +- src/engine/Values.cpp | 2 +- src/engine/Values.h | 2 +- test/OperationTest.cpp | 2 +- test/engine/TextIndexScanForEntityTest.cpp | 12 ++++----- test/engine/TextIndexScanForWordTest.cpp | 12 ++++----- test/engine/ValuesForTesting.h | 2 +- test/util/OperationTestHelpers.h | 4 +-- 53 files changed, 138 insertions(+), 102 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index e985e73305..813a206352 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -81,7 +81,7 @@ std::vector Bind::getChildren() { } // _____________________________________________________________________________ -Result Bind::computeResult() { +Result Bind::computeResult([[maybe_unused]] bool requestLazyness) { using std::endl; LOG(DEBUG) << "Get input to BIND operation..." << endl; shared_ptr subRes = _subtree->getResult(); diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 95406b25fc..8d2128e900 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -46,7 +46,7 @@ class Bind : public Operation { [[nodiscard]] vector resultSortedOn() const override; private: - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; // Implementation for the binding of arbitrary expressions. template diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index c26af77e63..53f6ccde8d 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -132,7 +132,7 @@ void CartesianProductJoin::writeResultColumn(std::span targetColumn, } } // ____________________________________________________________________________ -Result CartesianProductJoin::computeResult() { +Result CartesianProductJoin::computeResult([[maybe_unused]] bool requestLazyness) { IdTable result{getExecutionContext()->getAllocator()}; result.setNumColumns(getResultWidth()); std::vector> subResults; @@ -154,7 +154,7 @@ Result CartesianProductJoin::computeResult() { } subResults.push_back(child.getResult()); // Early stopping: If one of the results is empty, we can stop early. - if (subResults.back()->size() == 0) { + if (subResults.back()->idTable().size() == 0) { break; } // Example for the following calculation: If we have a LIMIT of 1000 and @@ -162,13 +162,14 @@ Result CartesianProductJoin::computeResult() { // needs to evaluate only its first 10 results. The +1 is because integer // divisions are rounded down by default. if (limitIfPresent.has_value()) { - limitIfPresent.value()._limit = - limitIfPresent.value()._limit.value() / subResults.back()->size() + 1; + limitIfPresent.value()._limit = limitIfPresent.value()._limit.value() / + subResults.back()->idTable().size() + + 1; } } auto sizesView = std::views::transform( - subResults, [](const auto& child) { return child->size(); }); + subResults, [](const auto& child) { return child->idTable().size(); }); auto totalResultSize = std::accumulate(sizesView.begin(), sizesView.end(), 1UL, std::multiplies{}); diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index c00867ba05..64fbf839d0 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -79,7 +79,7 @@ class CartesianProductJoin : public Operation { private: //! Compute the result of the query-subtree rooted at this element.. - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; // Copy each element from the `inputColumn` `groupSize` times to the // `targetColumn`. Repeat until the `targetColumn` is copletely filled. Skip diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index 22b318f377..4a024e2740 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -100,7 +100,7 @@ size_t CountAvailablePredicates::getCostEstimate() { } // _____________________________________________________________________________ -Result CountAvailablePredicates::computeResult() { +Result CountAvailablePredicates::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "CountAvailablePredicates result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(2); diff --git a/src/engine/CountAvailablePredicates.h b/src/engine/CountAvailablePredicates.h index d1c152cf67..6664aae161 100644 --- a/src/engine/CountAvailablePredicates.h +++ b/src/engine/CountAvailablePredicates.h @@ -103,6 +103,6 @@ class CountAvailablePredicates : public Operation { void computePatternTrickAllEntities( IdTable* result, const CompactVectorOfStrings& patterns) const; - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp index e7a7f6ccea..013787087c 100644 --- a/src/engine/Distinct.cpp +++ b/src/engine/Distinct.cpp @@ -37,7 +37,7 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const { } // _____________________________________________________________________________ -Result Distinct::computeResult() { +Result Distinct::computeResult([[maybe_unused]] bool requestLazyness) { IdTable idTable{getExecutionContext()->getAllocator()}; LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl; shared_ptr subRes = _subtree->getResult(); diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h index f3dde32014..9f9960efb9 100644 --- a/src/engine/Distinct.h +++ b/src/engine/Distinct.h @@ -55,7 +55,7 @@ class Distinct : public Operation { [[nodiscard]] string getCacheKeyImpl() const override; private: - virtual Result computeResult() override; + virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 8b4bfa0d49..c91914c1c1 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -91,8 +91,7 @@ nlohmann::json ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - const LimitOffsetClause& limitAndOffset, - std::shared_ptr res, + const LimitOffsetClause& limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle) { auto generator = constructQueryResultToTriples(qet, constructTriples, limitAndOffset, std::move(res), @@ -605,8 +604,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, - std::shared_ptr resultTable, + LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::sparqlXml); @@ -642,7 +640,7 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( resultTable->logResultSize(); auto timeResultComputation = requestTimer.msecs(); - size_t resultSize = resultTable->size(); + size_t resultSize = resultTable->idTable().size(); nlohmann::json j; diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 3ccdb288e3..dcdc997f6f 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -43,7 +43,7 @@ string Filter::getDescriptor() const { } // _____________________________________________________________________________ -Result Filter::computeResult() { +Result Filter::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl; shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Filter result computation..." << endl; diff --git a/src/engine/Filter.h b/src/engine/Filter.h index b10402414f..8b44f31592 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -58,7 +58,7 @@ class Filter : public Operation { return _subtree->getVariableColumns(); } - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; template void computeFilterImpl(IdTable* outputIdTable, diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 3bb8ca1912..88f6fe23af 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -309,7 +309,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput, *dynResult = std::move(result).toDynamic(); } -Result GroupBy::computeResult() { +Result GroupBy::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "GroupBy result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index 81e65cc8ff..ee433317ff 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -89,7 +89,7 @@ class GroupBy : public Operation { private: VariableToColumnMap computeVariableToColumnMap() const override; - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; template void processGroup(const Aggregate& expression, diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index 2f15d79451..6f674e5bd1 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -254,7 +254,7 @@ size_t HasPredicateScan::getCostEstimate() { } // ___________________________________________________________________________ -Result HasPredicateScan::computeResult() { +Result HasPredicateScan::computeResult([[maybe_unused]] bool requestLazyness) { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h index 8ba4d1cc24..52ad881354 100644 --- a/src/engine/HasPredicateScan.h +++ b/src/engine/HasPredicateScan.h @@ -109,7 +109,7 @@ class HasPredicateScan : public Operation { const CompactVectorOfStrings& patterns); private: - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 9996b66eee..5b73e8f6b1 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -123,7 +123,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { return variableToColumnMap; } // _____________________________________________________________________________ -Result IndexScan::computeResult() { +Result IndexScan::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "IndexScan result computation...\n"; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index e832aca3e3..398c8430a2 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -104,7 +104,7 @@ class IndexScan : public Operation { std::array getPermutedTriple() const; private: - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; vector getChildren() override { return {}; } diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 9bdffbf9e9..78ccc6c8fc 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -90,7 +90,7 @@ string Join::getCacheKeyImpl() const { string Join::getDescriptor() const { return "Join on " + _joinVar.name(); } // _____________________________________________________________________________ -Result Join::computeResult() { +Result Join::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "Getting sub-results for join result computation..." << endl; size_t leftWidth = _left->getResultWidth(); size_t rightWidth = _right->getResultWidth(); @@ -156,7 +156,7 @@ Result Join::computeResult() { shared_ptr leftRes = leftResIfCached ? leftResIfCached : _left->getResult(); checkCancellation(); - if (leftRes->size() == 0) { + if (leftRes->idTable().size() == 0) { _right->getRootOperation()->updateRuntimeInformationWhenOptimizedOut(); // TODO When we add triples to the // index, the vocabularies of index scans will not necessarily be empty and diff --git a/src/engine/Join.h b/src/engine/Join.h index ab9c532f3d..aaf3f7e63b 100644 --- a/src/engine/Join.h +++ b/src/engine/Join.h @@ -115,7 +115,7 @@ class Join : public Operation { virtual string getCacheKeyImpl() const override; private: - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Minus.cpp b/src/engine/Minus.cpp index e48f2fd4f5..6a5cd8af23 100644 --- a/src/engine/Minus.cpp +++ b/src/engine/Minus.cpp @@ -32,7 +32,7 @@ string Minus::getCacheKeyImpl() const { string Minus::getDescriptor() const { return "Minus"; } // _____________________________________________________________________________ -Result Minus::computeResult() { +Result Minus::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "Minus result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -43,8 +43,9 @@ Result Minus::computeResult() { LOG(DEBUG) << "Minus subresult computation done" << std::endl; - LOG(DEBUG) << "Computing minus of results of size " << leftResult->size() - << " and " << rightResult->size() << endl; + LOG(DEBUG) << "Computing minus of results of size " + << leftResult->idTable().size() << " and " + << rightResult->idTable().size() << endl; int leftWidth = leftResult->idTable().numColumns(); int rightWidth = rightResult->idTable().numColumns(); diff --git a/src/engine/Minus.h b/src/engine/Minus.h index 2b18350159..378642bc6f 100644 --- a/src/engine/Minus.h +++ b/src/engine/Minus.h @@ -72,7 +72,7 @@ class Minus : public Operation { const IdTableView& a, const IdTableView& b, size_t ia, size_t ib, const vector>& matchedColumns); - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp index af2c05691e..f766df9f05 100644 --- a/src/engine/MultiColumnJoin.cpp +++ b/src/engine/MultiColumnJoin.cpp @@ -59,7 +59,7 @@ string MultiColumnJoin::getDescriptor() const { } // _____________________________________________________________________________ -Result MultiColumnJoin::computeResult() { +Result MultiColumnJoin::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "MultiColumnJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -75,7 +75,8 @@ Result MultiColumnJoin::computeResult() { LOG(DEBUG) << "MultiColumnJoin subresult computation done." << std::endl; LOG(DEBUG) << "Computing a multi column join between results of size " - << leftResult->size() << " and " << rightResult->size() << endl; + << leftResult->idTable().size() << " and " + << rightResult->idTable().size() << endl; computeMultiColumnJoin(leftResult->idTable(), rightResult->idTable(), _joinColumns, &idTable); diff --git a/src/engine/MultiColumnJoin.h b/src/engine/MultiColumnJoin.h index 57f7910452..d60079c60f 100644 --- a/src/engine/MultiColumnJoin.h +++ b/src/engine/MultiColumnJoin.h @@ -63,7 +63,7 @@ class MultiColumnJoin : public Operation { IdTable* resultMightBeUnsorted); private: - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/NeutralElementOperation.h b/src/engine/NeutralElementOperation.h index 8de6f08beb..e4ee67a1d9 100644 --- a/src/engine/NeutralElementOperation.h +++ b/src/engine/NeutralElementOperation.h @@ -40,7 +40,7 @@ class NeutralElementOperation : public Operation { }; private: - Result computeResult() override { + Result computeResult([[maybe_unused]] bool requestLazyness) override { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(0); idTable.resize(1); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 70ac926b9b..75a2e7a2cd 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -71,7 +71,8 @@ void Operation::recursivelySetTimeConstraint( // ________________________________________________________________________ shared_ptr Operation::getResult(bool isRoot, - bool onlyReadFromCache) { + bool onlyReadFromCache, + bool requestLazyness) { ad_utility::Timer timer{ad_utility::Timer::Started}; if (isRoot) { @@ -120,11 +121,12 @@ shared_ptr Operation::getResult(bool isRoot, updateRuntimeInformationOnFailure(timer.msecs()); } }); - auto computeLambda = [this, &timer] { + auto computeLambda = [this, &timer, requestLazyness] { checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); - Result result = computeResult(); + Result result = computeResult(requestLazyness); + AD_CONTRACT_CHECK(requestLazyness || result.isDataEvaluated()); checkCancellation(); // Compute the datatypes that occur in each column of the result. @@ -170,8 +172,9 @@ shared_ptr Operation::getResult(bool isRoot, } updateRuntimeInformationOnSuccess(result, timer.msecs()); - auto resultNumRows = result._resultPointer->resultTable()->size(); - auto resultNumCols = result._resultPointer->resultTable()->width(); + auto resultNumRows = result._resultPointer->resultTable()->idTable().size(); + auto resultNumCols = + result._resultPointer->resultTable()->idTable().numColumns(); LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " << resultNumCols << std::endl; return result._resultPointer->resultTable(); @@ -224,7 +227,7 @@ void Operation::updateRuntimeInformationOnSuccess( const Result& resultTable, ad_utility::CacheStatus cacheStatus, Milliseconds duration, std::optional runtimeInfo) { _runtimeInfo->totalTime_ = duration; - _runtimeInfo->numRows_ = resultTable.size(); + _runtimeInfo->numRows_ = resultTable.idTable().size(); _runtimeInfo->cacheStatus_ = cacheStatus; _runtimeInfo->status_ = RuntimeInformation::Status::fullyMaterialized; diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 316e5ba7b7..22d8ae008e 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -147,7 +147,8 @@ class Operation { * `onlyReadFromCache` is true. */ shared_ptr getResult(bool isRoot = false, - bool onlyReadFromCache = false); + bool onlyReadFromCache = false, + bool requestLazyness = false); // Use the same cancellation handle for all children of an operation (= query // plan rooted at that operation). As soon as one child is aborted, the whole @@ -195,7 +196,10 @@ class Operation { // Direct access to the `computeResult()` method. This should be only used for // testing, otherwise the `getResult()` function should be used which also // sets the runtime info and uses the cache. - virtual Result computeResultOnlyForTesting() final { return computeResult(); } + virtual Result computeResultOnlyForTesting( + bool requestLazyness = false) final { + return computeResult(requestLazyness); + } protected: // The QueryExecutionContext for this particular element. @@ -244,7 +248,7 @@ class Operation { private: //! Compute the result of the query-subtree rooted at this element.. - virtual Result computeResult() = 0; + virtual Result computeResult(bool requestLazyness) = 0; // Create and store the complete runtime information for this operation after // it has either been succesfully computed or read from the cache. diff --git a/src/engine/OptionalJoin.cpp b/src/engine/OptionalJoin.cpp index 70520e9669..97ec965920 100644 --- a/src/engine/OptionalJoin.cpp +++ b/src/engine/OptionalJoin.cpp @@ -88,7 +88,7 @@ string OptionalJoin::getDescriptor() const { } // _____________________________________________________________________________ -Result OptionalJoin::computeResult() { +Result OptionalJoin::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "OptionalJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -104,7 +104,8 @@ Result OptionalJoin::computeResult() { LOG(DEBUG) << "OptionalJoin subresult computation done." << std::endl; LOG(DEBUG) << "Computing optional join between results of size " - << leftResult->size() << " and " << rightResult->size() << endl; + << leftResult->idTable().size() << " and " + << rightResult->idTable().size() << endl; optionalJoin(leftResult->idTable(), rightResult->idTable(), _joinColumns, &idTable, implementation_); diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h index ace0168106..63d3511fe0 100644 --- a/src/engine/OptionalJoin.h +++ b/src/engine/OptionalJoin.h @@ -75,7 +75,7 @@ class OptionalJoin : public Operation { private: void computeSizeEstimateAndMultiplicities(); - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/OrderBy.cpp b/src/engine/OrderBy.cpp index 6dec565589..8c36a195c7 100644 --- a/src/engine/OrderBy.cpp +++ b/src/engine/OrderBy.cpp @@ -62,7 +62,7 @@ std::string OrderBy::getDescriptor() const { } // _____________________________________________________________________________ -Result OrderBy::computeResult() { +Result OrderBy::computeResult([[maybe_unused]] bool requestLazyness) { using std::endl; LOG(DEBUG) << "Getting sub-result for OrderBy result computation..." << endl; shared_ptr subRes = subtree_->getResult(); @@ -71,7 +71,7 @@ Result OrderBy::computeResult() { auto sortEstimateCancellationFactor = RuntimeParameters().get<"sort-estimate-cancellation-factor">(); if (getExecutionContext()->getSortPerformanceEstimator().estimatedSortTime( - subRes->size(), subRes->width()) > + subRes->idTable().size(), subRes->idTable().numColumns()) > remainingTime() * sortEstimateCancellationFactor) { // The estimated time for this sort is much larger than the actually // remaining time, cancel this operation diff --git a/src/engine/OrderBy.h b/src/engine/OrderBy.h index c4bc96214a..91b15220ca 100644 --- a/src/engine/OrderBy.h +++ b/src/engine/OrderBy.h @@ -78,7 +78,7 @@ class OrderBy : public Operation { } private: - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override { return subtree_->getVariableColumns(); diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 79a95619c2..cff8925f46 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -47,8 +47,9 @@ class CacheValue { struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { if (const auto& tablePtr = cacheValue._resultTable; tablePtr) { - return ad_utility::MemorySize::bytes(tablePtr->size() * - tablePtr->width() * sizeof(Id)); + return ad_utility::MemorySize::bytes(tablePtr->idTable().size() * + tablePtr->idTable().numColumns() * + sizeof(Id)); } else { return 0_B; } diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index beab5b742b..c458f60c90 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -84,7 +84,7 @@ size_t QueryExecutionTree::getCostEstimate() { size_t QueryExecutionTree::getSizeEstimate() { if (!sizeEstimate_.has_value()) { if (cachedResult_) { - sizeEstimate_ = cachedResult_->size(); + sizeEstimate_ = cachedResult_->idTable().size(); } else { // if we are in a unit test setting and there is no QueryExecutionContest // specified it is the rootOperation_'s obligation to handle this case @@ -98,7 +98,7 @@ size_t QueryExecutionTree::getSizeEstimate() { // _____________________________________________________________________________ bool QueryExecutionTree::knownEmptyResult() { if (cachedResult_) { - return cachedResult_->size() == 0; + return cachedResult_->idTable().size() == 0; } return rootOperation_->knownEmptyResult(); } diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 3bade1c59e..abec47266e 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -51,8 +51,8 @@ class QueryExecutionTree { size_t getResultWidth() const { return rootOperation_->getResultWidth(); } - std::shared_ptr getResult() const { - return rootOperation_->getResult(isRoot()); + std::shared_ptr getResult(bool requestLazyness = false) const { + return rootOperation_->getResult(isRoot(), false, requestLazyness); } // A variable, its column index in the Id space result, and the `ResultType` diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index dee18aff28..b74b291630 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -34,7 +34,7 @@ auto Result::getMergedLocalVocab(const Result& resultTable1, LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } // _____________________________________________________________________________ -Result::Result(IdTable idTable, std::vector sortedBy, +Result::Result(TableType idTable, std::vector sortedBy, SharedLocalVocabWrapper localVocab) : _idTable{std::move(idTable)}, _sortedBy{std::move(sortedBy)}, @@ -58,7 +58,7 @@ Result::Result(IdTable idTable, std::vector sortedBy, } // _____________________________________________________________________________ -Result::Result(IdTable idTable, std::vector sortedBy, +Result::Result(TableType idTable, std::vector sortedBy, LocalVocab&& localVocab) : Result(std::move(idTable), std::move(sortedBy), SharedLocalVocabWrapper{std::move(localVocab)}) {} @@ -113,3 +113,30 @@ bool Result::checkDefinedness(const VariableToColumnMap& varColMap) { !hasUndefined; }); } + +// _____________________________________________________________________________ +const IdTable& Result::idTable() const { + AD_CONTRACT_CHECK(isDataEvaluated()); + return std::get(_idTable); +} + +// _____________________________________________________________________________ +cppcoro::generator& Result::idTables() { + AD_CONTRACT_CHECK(!isDataEvaluated()); + return std::get>(_idTable); +} + +// _____________________________________________________________________________ +bool Result::isDataEvaluated() const { + return std::holds_alternative(_idTable); +} + +// _____________________________________________________________________________ +void Result::logResultSize() const { + if (isDataEvaluated()) { + LOG(INFO) << "Result has size " << idTable().size() << " x " + << idTable().numColumns() << std::endl; + } else { + LOG(INFO) << "Result has unknown size (not computed yet)" << std::endl; + } +} diff --git a/src/engine/Result.h b/src/engine/Result.h index 336e83adbf..8927b29767 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -15,6 +15,7 @@ #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "parser/data/LimitOffsetClause.h" +#include "util/Generator.h" #include "util/Log.h" // The result of an `Operation`. This is the class QLever uses for all @@ -23,7 +24,8 @@ class Result { private: // The actual entries. - std::variant _idTable; + using TableType = std::variant>; + TableType _idTable; // The column indices by which the result is sorted (primary sort key first). // Empty if the result is not sorted on any column. @@ -80,9 +82,9 @@ class Result { // The first overload of the constructor is for local vocabs that are shared // with another `Result` via the `getSharedLocalVocab...` methods below. // The second overload is for newly created local vocabularies. - Result(IdTable idTable, std::vector sortedBy, + Result(TableType idTable, std::vector sortedBy, SharedLocalVocabWrapper localVocab); - Result(IdTable idTable, std::vector sortedBy, + Result(TableType idTable, std::vector sortedBy, LocalVocab&& localVocab); // Prevent accidental copying of a result table. @@ -96,14 +98,11 @@ class Result { // Default destructor. virtual ~Result() = default; - // Get the number of rows of this result. - size_t size() const { return std::get<0>(_idTable).size(); } - - // Get the number of columns of this result. - size_t width() const { return std::get<0>(_idTable).numColumns(); } - // Const access to the underlying `IdTable`. - const IdTable& idTable() const { return std::get<0>(_idTable); } + const IdTable& idTable() const; + + // Access to the underlying `IdTable`. + cppcoro::generator& idTables(); // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return _sortedBy; } @@ -148,13 +147,13 @@ class Result { // (which is not possible with `shareLocalVocabFrom`). LocalVocab getCopyOfLocalVocab() const; + bool isDataEvaluated() const; + // Log the size of this result. We call this at several places in // `Server::processQuery`. Ideally, this should only be called in one // place, but for now, this method at least makes sure that these log // messages look all the same. - void logResultSize() const { - LOG(INFO) << "Result has size " << size() << " x " << width() << std::endl; - } + void logResultSize() const; // The first rows of the result and its total size (for debugging). string asDebugString() const; diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 172e09dab5..053c8c6ff9 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -83,7 +83,7 @@ size_t Service::getCostEstimate() { } // ____________________________________________________________________________ -Result Service::computeResult() { +Result Service::computeResult([[maybe_unused]] bool requestLazyness) { // Get the URL of the SPARQL endpoint. std::string_view serviceIriString = parsedServiceClause_.serviceIri_.iri(); AD_CONTRACT_CHECK(serviceIriString.starts_with("<") && diff --git a/src/engine/Service.h b/src/engine/Service.h index 23922e8d78..09f89ae896 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -80,7 +80,7 @@ class Service : public Operation { std::string getCacheKeyImpl() const override; // Compute the result using `getTsvFunction_`. - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; // Write the given TSV result to the given result object. The `I` is the width // of the result table. diff --git a/src/engine/Sort.cpp b/src/engine/Sort.cpp index 33acd4015f..06bdf2949c 100644 --- a/src/engine/Sort.cpp +++ b/src/engine/Sort.cpp @@ -50,7 +50,7 @@ std::string Sort::getDescriptor() const { } // _____________________________________________________________________________ -Result Sort::computeResult() { +Result Sort::computeResult([[maybe_unused]] bool requestLazyness) { using std::endl; LOG(DEBUG) << "Getting sub-result for Sort result computation..." << endl; shared_ptr subRes = subtree_->getResult(); @@ -59,7 +59,7 @@ Result Sort::computeResult() { auto sortEstimateCancellationFactor = RuntimeParameters().get<"sort-estimate-cancellation-factor">(); if (getExecutionContext()->getSortPerformanceEstimator().estimatedSortTime( - subRes->size(), subRes->width()) > + subRes->idTable().size(), subRes->idTable().numColumns()) > remainingTime() * sortEstimateCancellationFactor) { // The estimated time for this sort is much larger than the actually // remaining time, cancel this operation diff --git a/src/engine/Sort.h b/src/engine/Sort.h index 42bc44badc..48ebef179f 100644 --- a/src/engine/Sort.h +++ b/src/engine/Sort.h @@ -67,7 +67,7 @@ class Sort : public Operation { } private: - virtual Result computeResult() override; + virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override { diff --git a/src/engine/TextIndexScanForEntity.cpp b/src/engine/TextIndexScanForEntity.cpp index b0a4d40346..f693d78afe 100644 --- a/src/engine/TextIndexScanForEntity.cpp +++ b/src/engine/TextIndexScanForEntity.cpp @@ -14,7 +14,7 @@ TextIndexScanForEntity::TextIndexScanForEntity( word_(std::move(word)) {} // _____________________________________________________________________________ -Result TextIndexScanForEntity::computeResult() { +Result TextIndexScanForEntity::computeResult([[maybe_unused]] bool requestLazyness) { IdTable idTable = getExecutionContext()->getIndex().getEntityMentionsForWord( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForEntity.h b/src/engine/TextIndexScanForEntity.h index 899cef6af2..adf1acbcb8 100644 --- a/src/engine/TextIndexScanForEntity.h +++ b/src/engine/TextIndexScanForEntity.h @@ -101,7 +101,7 @@ class TextIndexScanForEntity : public Operation { return std::get(varOrFixed_.entity_).second; } - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 692a797caa..5f6e770634 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -13,7 +13,7 @@ TextIndexScanForWord::TextIndexScanForWord(QueryExecutionContext* qec, isPrefix_(word_.ends_with('*')) {} // _____________________________________________________________________________ -Result TextIndexScanForWord::computeResult() { +Result TextIndexScanForWord::computeResult([[maybe_unused]] bool requestLazyness) { IdTable idTable = getExecutionContext()->getIndex().getWordPostingsForTerm( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForWord.h b/src/engine/TextIndexScanForWord.h index 095b466e53..c4c1d9126c 100644 --- a/src/engine/TextIndexScanForWord.h +++ b/src/engine/TextIndexScanForWord.h @@ -50,7 +50,7 @@ class TextIndexScanForWord : public Operation { private: // Returns a Result containing an IdTable with the columns being // the text variable and the completed word (if it was prefixed) - Result computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index 0f352802bb..8dbb3ec9dd 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -142,7 +142,7 @@ class TransitivePathImpl : public TransitivePathBase { * * @return Result The result of the TransitivePath operation */ - Result computeResult() override { + Result computeResult([[maybe_unused]] bool requestLazyness) override { if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && rhs_.isVariable()) { AD_THROW( diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 02db123fbc..a6225c4abf 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -158,7 +158,7 @@ size_t Union::getCostEstimate() { getSizeEstimateBeforeLimit(); } -Result Union::computeResult() { +Result Union::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "Union result computation..." << std::endl; shared_ptr subRes1 = _subtrees[0]->getResult(); shared_ptr subRes2 = _subtrees[1]->getResult(); diff --git a/src/engine/Union.h b/src/engine/Union.h index 99ad5d9ac3..38c3c8f114 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -61,7 +61,7 @@ class Union : public Operation { } private: - virtual Result computeResult() override; + virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Values.cpp b/src/engine/Values.cpp index 2e40dc3c60..37ce45e96e 100644 --- a/src/engine/Values.cpp +++ b/src/engine/Values.cpp @@ -108,7 +108,7 @@ void Values::computeMultiplicities() { } // ____________________________________________________________________________ -Result Values::computeResult() { +Result Values::computeResult([[maybe_unused]] bool requestLazyness) { // Set basic properties of the result table. IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/Values.h b/src/engine/Values.h index 7d291362b3..ad51647b36 100644 --- a/src/engine/Values.h +++ b/src/engine/Values.h @@ -48,7 +48,7 @@ class Values : public Operation { public: // These two are also used by class `Service`, hence public. - virtual Result computeResult() override; + virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index b2bb425001..8ad6a67d37 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -152,7 +152,7 @@ TEST(OperationTest, verifyExceptionIsThrownOnCancellation) { std::this_thread::sleep_for(5ms); handle->cancel(CancellationState::TIMEOUT); }}; - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(operation.computeResult(), + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(operation.computeResult(false), ::testing::HasSubstr("timed out"), ad_utility::CancellationException); } diff --git a/test/engine/TextIndexScanForEntityTest.cpp b/test/engine/TextIndexScanForEntityTest.cpp index c88bc64053..e7d593da48 100644 --- a/test/engine/TextIndexScanForEntityTest.cpp +++ b/test/engine/TextIndexScanForEntityTest.cpp @@ -32,8 +32,8 @@ TEST(TextIndexScanForEntity, EntityScanBasic) { ASSERT_EQ(s1.getResultWidth(), 3); auto result = s1.computeResultOnlyForTesting(); - ASSERT_EQ(result.width(), 3); - ASSERT_EQ(result.size(), 3); + ASSERT_EQ(result.idTable().numColumns(), 3); + ASSERT_EQ(result.idTable().size(), 3); // NOTE: because of the way the graph above is constructed, the entities are // texts @@ -61,8 +61,8 @@ TEST(TextIndexScanForEntity, FixedEntityScan) { auto result = s3.computeResultOnlyForTesting(); ASSERT_EQ(s3.getResultWidth(), 2); - ASSERT_EQ(result.width(), 2); - ASSERT_EQ(result.size(), 1); + ASSERT_EQ(result.idTable().numColumns(), 2); + ASSERT_EQ(result.idTable().size(), 1); using enum ColumnIndexAndTypeInfo::UndefStatus; VariableToColumnMap expectedVariables = { @@ -78,8 +78,8 @@ TEST(TextIndexScanForEntity, FixedEntityScan) { fixedEntity = "\"he failed the test\""; TextIndexScanForEntity s4{qec, Variable{"?text4"}, fixedEntity, "test*"}; result = s4.computeResultOnlyForTesting(); - ASSERT_EQ(result.width(), 2); - ASSERT_EQ(result.size(), 1); + ASSERT_EQ(result.idTable().numColumns(), 2); + ASSERT_EQ(result.idTable().size(), 1); ASSERT_EQ(fixedEntity, h::getTextRecordFromResultTable(qec, result, 0)); } diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 60806fc73a..57f4b46e02 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -32,8 +32,8 @@ TEST(TextIndexScanForWord, WordScanPrefix) { ASSERT_EQ(s1.getResultWidth(), 2); auto result = s1.computeResultOnlyForTesting(); - ASSERT_EQ(result.width(), 2); - ASSERT_EQ(result.size(), 3); + ASSERT_EQ(result.idTable().numColumns(), 2); + ASSERT_EQ(result.idTable().size(), 3); s2.getExternallyVisibleVariableColumns(); using enum ColumnIndexAndTypeInfo::UndefStatus; @@ -63,8 +63,8 @@ TEST(TextIndexScanForWord, WordScanBasic) { ASSERT_EQ(s1.getResultWidth(), 1); auto result = s1.computeResultOnlyForTesting(); - ASSERT_EQ(result.width(), 1); - ASSERT_EQ(result.size(), 2); + ASSERT_EQ(result.idTable().numColumns(), 1); + ASSERT_EQ(result.idTable().size(), 2); ASSERT_EQ("\"he failed the test\"", h::getTextRecordFromResultTable(qec, result, 0)); @@ -76,8 +76,8 @@ TEST(TextIndexScanForWord, WordScanBasic) { ASSERT_EQ(s2.getResultWidth(), 1); result = s2.computeResultOnlyForTesting(); - ASSERT_EQ(result.width(), 1); - ASSERT_EQ(result.size(), 1); + ASSERT_EQ(result.idTable().numColumns(), 1); + ASSERT_EQ(result.idTable().size(), 1); ASSERT_EQ("\"testing can help\"", h::getTextRecordFromResultTable(qec, result, 0)); diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 07f2478a4b..03f12477ed 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -49,7 +49,7 @@ class ValuesForTesting : public Operation { size_t& costEstimate() { return costEstimate_; } // ___________________________________________________________________________ - Result computeResult() override { + Result computeResult([[maybe_unused]] bool requestLazyness) override { auto table = table_.clone(); if (supportsLimit_) { table.erase(table.begin() + getLimit().upperBound(table.size()), diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index 088ecff65f..ca500083cc 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -31,7 +31,7 @@ class StallForeverOperation : public Operation { using Operation::Operation; // Do-nothing operation that runs for 100ms without computing anything, but // which can be cancelled. - Result computeResult() override { + Result computeResult([[maybe_unused]] bool requestLazyness) override { auto end = std::chrono::steady_clock::now() + 100ms; while (std::chrono::steady_clock::now() < end) { checkCancellation(); @@ -73,7 +73,7 @@ class ShallowParentOperation : public Operation { return {child_.get()}; } - Result computeResult() override { + Result computeResult([[maybe_unused]] bool requestLazyness) override { auto childResult = child_->getResult(); return {childResult->idTable().clone(), resultSortedOn(), childResult->getSharedLocalVocab()}; From 515ed0c935758012d31bebf5febfdfb11f8d1be4 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 22 Apr 2024 02:11:00 +0200 Subject: [PATCH 004/133] Start fixing caching issues --- src/engine/Bind.cpp | 2 +- src/engine/Distinct.cpp | 3 ++- src/engine/ExportQueryExecutionTrees.cpp | 12 +++++------ src/engine/ExportQueryExecutionTrees.h | 9 +++----- src/engine/Filter.cpp | 2 +- src/engine/Join.cpp | 4 ++-- src/engine/MultiColumnJoin.cpp | 1 + src/engine/Operation.cpp | 20 ++++++++++-------- src/engine/Operation.h | 6 +++--- src/engine/OptionalJoin.cpp | 1 + src/engine/OrderBy.cpp | 3 ++- src/engine/QueryExecutionContext.h | 18 ++++++---------- src/engine/QueryExecutionTree.cpp | 3 ++- src/engine/Result.cpp | 6 ++++++ src/engine/Sort.cpp | 3 ++- src/engine/TransitivePathImpl.h | 4 ++-- src/engine/Union.cpp | 8 ++++---- src/index/CompressedRelation.cpp | 10 ++++----- src/util/ConcurrentCache.h | 26 +++++++++++++++--------- 19 files changed, 77 insertions(+), 64 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 813a206352..67650c22c5 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -84,7 +84,7 @@ std::vector Bind::getChildren() { Result Bind::computeResult([[maybe_unused]] bool requestLazyness) { using std::endl; LOG(DEBUG) << "Get input to BIND operation..." << endl; - shared_ptr subRes = _subtree->getResult(); + std::shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Got input to Bind operation." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp index 013787087c..25fe723ca7 100644 --- a/src/engine/Distinct.cpp +++ b/src/engine/Distinct.cpp @@ -7,6 +7,7 @@ #include #include "engine/CallFixedSize.h" +#include "engine/Engine.h" #include "engine/QueryExecutionTree.h" using std::endl; @@ -40,7 +41,7 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const { Result Distinct::computeResult([[maybe_unused]] bool requestLazyness) { IdTable idTable{getExecutionContext()->getAllocator()}; LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl; - shared_ptr subRes = _subtree->getResult(); + std::shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Distinct result computation..." << endl; idTable.setNumColumns(subRes->idTable().numColumns()); diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index c91914c1c1..2f3da341a7 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -267,7 +267,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { using nlohmann::json; @@ -387,7 +387,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(resultTable != nullptr); LOG(DEBUG) << "Resolving strings for finished binary result...\n"; @@ -417,7 +417,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // This call triggers the possibly expensive computation of the query result // unless the result is already cached. - shared_ptr resultTable = qet.getResult(); + std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); LOG(DEBUG) << "Converting result IDs to their corresponding strings ..." << std::endl; @@ -562,7 +562,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: selectClause.getSelectedVariablesAsStrings(); // This call triggers the possibly expensive computation of the query result // unless the result is already cached. - shared_ptr resultTable = qet.getResult(); + std::shared_ptr resultTable = qet.getResult(); // In the XML format, the variables don't include the question mark. auto varsWithoutQuestionMark = std::views::transform( @@ -636,7 +636,7 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, const ad_utility::Timer& requestTimer, uint64_t maxSend, CancellationHandle cancellationHandle) { - shared_ptr resultTable = qet.getResult(); + std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); auto timeResultComputation = requestTimer.msecs(); @@ -723,7 +723,7 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( AD_THROW( "SPARQL-compliant JSON format is only supported for SELECT queries"); } - shared_ptr resultTable = qet.getResult(); + std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); nlohmann::json j; auto limitAndOffset = query._limitOffset; diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 0bab7bca5a..e3a22afead 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -2,8 +2,6 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach -#include - #include "engine/QueryExecutionTree.h" #include "parser/data/LimitOffsetClause.h" #include "util/CancellationHandle.h" @@ -111,7 +109,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle); /** @@ -140,8 +138,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, const LimitOffsetClause& limitAndOffset, - std::shared_ptr res, - CancellationHandle cancellationHandle); + std::shared_ptr res, CancellationHandle cancellationHandle); // Generate an RDF graph for a CONSTRUCT query. static cppcoro::generator @@ -156,7 +153,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle); // ___________________________________________________________________________ diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index dcdc997f6f..2198530d32 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -45,7 +45,7 @@ string Filter::getDescriptor() const { // _____________________________________________________________________________ Result Filter::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl; - shared_ptr subRes = _subtree->getResult(); + std::shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Filter result computation..." << endl; checkCancellation(); diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 78ccc6c8fc..3082ea7a06 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -153,7 +153,7 @@ Result Join::computeResult([[maybe_unused]] bool requestLazyness) { } } - shared_ptr leftRes = + std::shared_ptr leftRes = leftResIfCached ? leftResIfCached : _left->getResult(); checkCancellation(); if (leftRes->idTable().size() == 0) { @@ -181,7 +181,7 @@ Result Join::computeResult([[maybe_unused]] bool requestLazyness) { leftRes->getSharedLocalVocab()}; } - shared_ptr rightRes = + std::shared_ptr rightRes = rightResIfCached ? rightResIfCached : _right->getResult(); checkCancellation(); join(leftRes->idTable(), _leftJoinCol, rightRes->idTable(), _rightJoinCol, diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp index f766df9f05..999e55b0c8 100644 --- a/src/engine/MultiColumnJoin.cpp +++ b/src/engine/MultiColumnJoin.cpp @@ -6,6 +6,7 @@ #include "engine/AddCombinedRowToTable.h" #include "engine/CallFixedSize.h" +#include "engine/Engine.h" #include "util/JoinAlgorithms/JoinAlgorithms.h" using std::endl; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 75a2e7a2cd..8fa66b9db4 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -70,9 +70,10 @@ void Operation::recursivelySetTimeConstraint( } // ________________________________________________________________________ -shared_ptr Operation::getResult(bool isRoot, - bool onlyReadFromCache, - bool requestLazyness) { +std::shared_ptr Operation::getResult(bool isRoot, + bool onlyReadFromCache, + bool requestLazyness) { + AD_CONTRACT_CHECK(!onlyReadFromCache || !requestLazyness); ad_utility::Timer timer{ad_utility::Timer::Started}; if (isRoot) { @@ -172,11 +173,14 @@ shared_ptr Operation::getResult(bool isRoot, } updateRuntimeInformationOnSuccess(result, timer.msecs()); - auto resultNumRows = result._resultPointer->resultTable()->idTable().size(); - auto resultNumCols = - result._resultPointer->resultTable()->idTable().numColumns(); - LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " - << resultNumCols << std::endl; + if (result._resultPointer->resultTable()->isDataEvaluated()) { + auto resultNumRows = + result._resultPointer->resultTable()->idTable().size(); + auto resultNumCols = + result._resultPointer->resultTable()->idTable().numColumns(); + LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " + << resultNumCols << std::endl; + } return result._resultPointer->resultTable(); } catch (ad_utility::CancellationException& e) { e.setOperation(getDescriptor()); diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 22d8ae008e..7e30237acd 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -146,9 +146,9 @@ class Operation { * @return A shared pointer to the result. May only be `nullptr` if * `onlyReadFromCache` is true. */ - shared_ptr getResult(bool isRoot = false, - bool onlyReadFromCache = false, - bool requestLazyness = false); + std::shared_ptr getResult(bool isRoot = false, + bool onlyReadFromCache = false, + bool requestLazyness = false); // Use the same cancellation handle for all children of an operation (= query // plan rooted at that operation). As soon as one child is aborted, the whole diff --git a/src/engine/OptionalJoin.cpp b/src/engine/OptionalJoin.cpp index 97ec965920..80d6e11973 100644 --- a/src/engine/OptionalJoin.cpp +++ b/src/engine/OptionalJoin.cpp @@ -7,6 +7,7 @@ #include "engine/AddCombinedRowToTable.h" #include "engine/CallFixedSize.h" +#include "engine/Engine.h" #include "util/JoinAlgorithms/JoinAlgorithms.h" using std::endl; diff --git a/src/engine/OrderBy.cpp b/src/engine/OrderBy.cpp index 8c36a195c7..984a92b9e4 100644 --- a/src/engine/OrderBy.cpp +++ b/src/engine/OrderBy.cpp @@ -8,6 +8,7 @@ #include #include "engine/CallFixedSize.h" +#include "engine/Engine.h" #include "engine/QueryExecutionTree.h" #include "global/RuntimeParameters.h" #include "global/ValueIdComparators.h" @@ -65,7 +66,7 @@ std::string OrderBy::getDescriptor() const { Result OrderBy::computeResult([[maybe_unused]] bool requestLazyness) { using std::endl; LOG(DEBUG) << "Getting sub-result for OrderBy result computation..." << endl; - shared_ptr subRes = subtree_->getResult(); + std::shared_ptr subRes = subtree_->getResult(); // TODO proper timeout for sorting operations auto sortEstimateCancellationFactor = diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index cff8925f46..77a67b41ba 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -9,25 +9,16 @@ #include #include #include -#include -#include "engine/Engine.h" #include "engine/QueryPlanningCostFactors.h" #include "engine/Result.h" #include "engine/RuntimeInformation.h" #include "engine/SortPerformanceEstimator.h" -#include "global/Constants.h" #include "global/Id.h" #include "index/Index.h" #include "util/Cache.h" #include "util/ConcurrentCache.h" -#include "util/Log.h" #include "util/Synchronized.h" -#include "util/http/websocket/QueryId.h" - -using std::shared_ptr; -using std::string; -using std::vector; class CacheValue { private: @@ -39,12 +30,15 @@ class CacheValue { : _resultTable(std::make_shared(std::move(resultTable))), _runtimeInfo(std::move(runtimeInfo)) {} - const shared_ptr& resultTable() const { return _resultTable; } + const std::shared_ptr& resultTable() const { + return _resultTable; + } const RuntimeInformation& runtimeInfo() const { return _runtimeInfo; } // Calculates the `MemorySize` taken up by an instance of `CacheValue`. struct SizeGetter { + // TODO Ensure this is only called for fully materialized results ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { if (const auto& tablePtr = cacheValue._resultTable; tablePtr) { return ad_utility::MemorySize::bytes(tablePtr->idTable().size() * @@ -61,7 +55,7 @@ class CacheValue { // checks on insertion, if the result is currently being computed // by another query. using ConcurrentLruCache = ad_utility::ConcurrentCache< - ad_utility::LRUCache>; + ad_utility::LRUCache>; using PinnedSizes = ad_utility::Synchronized, std::shared_mutex>; @@ -123,7 +117,7 @@ class QueryExecutionContext { return _sortPerformanceEstimator; } - [[nodiscard]] double getCostFactor(const string& key) const { + [[nodiscard]] double getCostFactor(const std::string& key) const { return _costFactors.getCostFactor(key); }; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index c458f60c90..35aa938614 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -158,7 +158,8 @@ std::vector> QueryExecutionTree::getJoinColumns( } // ____________________________________________________________________________ -std::pair, shared_ptr> +std::pair, + std::shared_ptr> QueryExecutionTree::createSortedTrees( std::shared_ptr qetA, std::shared_ptr qetB, diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index b74b291630..738a453058 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -68,6 +68,8 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). + // TODO handle generator case properly + AD_CONTRACT_CHECK(isDataEvaluated()); auto& idTable = std::get<0>(_idTable); std::ranges::for_each( idTable.getColumns(), @@ -89,6 +91,8 @@ auto Result::getOrComputeDatatypeCountsPerColumn() if (datatypeCountsPerColumn_.has_value()) { return datatypeCountsPerColumn_.value(); } + // TODO handle generator case properly + AD_CONTRACT_CHECK(isDataEvaluated()); auto& idTable = std::get<0>(_idTable); auto& types = datatypeCountsPerColumn_.emplace(); types.resize(idTable.numColumns()); @@ -122,6 +126,8 @@ const IdTable& Result::idTable() const { // _____________________________________________________________________________ cppcoro::generator& Result::idTables() { + // TODO Find out if scenarios exist where it makes + // sense to return a generator with a single element here. AD_CONTRACT_CHECK(!isDataEvaluated()); return std::get>(_idTable); } diff --git a/src/engine/Sort.cpp b/src/engine/Sort.cpp index 06bdf2949c..71012820d7 100644 --- a/src/engine/Sort.cpp +++ b/src/engine/Sort.cpp @@ -8,6 +8,7 @@ #include #include "engine/CallFixedSize.h" +#include "engine/Engine.h" #include "engine/QueryExecutionTree.h" #include "global/RuntimeParameters.h" @@ -53,7 +54,7 @@ std::string Sort::getDescriptor() const { Result Sort::computeResult([[maybe_unused]] bool requestLazyness) { using std::endl; LOG(DEBUG) << "Getting sub-result for Sort result computation..." << endl; - shared_ptr subRes = subtree_->getResult(); + std::shared_ptr subRes = subtree_->getResult(); // TODO proper timeout for sorting operations auto sortEstimateCancellationFactor = diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index 8dbb3ec9dd..38c24710ed 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -150,7 +150,7 @@ class TransitivePathImpl : public TransitivePathBase { "not supported"); } auto [startSide, targetSide] = decideDirection(); - shared_ptr subRes = subtree_->getResult(); + std::shared_ptr subRes = subtree_->getResult(); IdTable idTable{allocator()}; @@ -159,7 +159,7 @@ class TransitivePathImpl : public TransitivePathBase { size_t subWidth = subRes->idTable().numColumns(); if (startSide.isBoundVariable()) { - shared_ptr sideRes = + std::shared_ptr sideRes = startSide.treeAndCol_.value().first->getResult(); size_t sideWidth = sideRes->idTable().numColumns(); diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index a6225c4abf..9571d3b060 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -75,7 +75,7 @@ VariableToColumnMap Union::computeVariableToColumnMap() const { // subtrees and if it is guaranteed to be bound in all the subtrees. auto mightContainUndef = [this](const Variable& var) { return std::ranges::any_of( - _subtrees, [&](const shared_ptr& subtree) { + _subtrees, [&](const std::shared_ptr& subtree) { const auto& varCols = subtree->getVariableColumns(); return !varCols.contains(var) || (varCols.at(var).mightContainUndef_ == @@ -160,8 +160,8 @@ size_t Union::getCostEstimate() { Result Union::computeResult([[maybe_unused]] bool requestLazyness) { LOG(DEBUG) << "Union result computation..." << std::endl; - shared_ptr subRes1 = _subtrees[0]->getResult(); - shared_ptr subRes2 = _subtrees[1]->getResult(); + std::shared_ptr subRes1 = _subtrees[0]->getResult(); + std::shared_ptr subRes2 = _subtrees[1]->getResult(); LOG(DEBUG) << "Union subresult computation done." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; @@ -174,7 +174,7 @@ Result Union::computeResult([[maybe_unused]] bool requestLazyness) { // If only one of the two operands has a non-empty local vocabulary, share // with that one (otherwise, throws an exception). return Result{std::move(idTable), resultSortedOn(), - Result::getMergedLocalVocab(*subRes1, *subRes2)}; + Result::getMergedLocalVocab(*subRes1, *subRes2)}; } void Union::computeUnion( diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index 498cf741f4..06d50f90c9 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -436,11 +436,11 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( auto cacheKey = blockMetadata.offsetsAndCompressedSize_.at(0).offsetInFile_; auto sharedResultFromCache = blockCache_ - .computeOnce(cacheKey, - [&]() { - return readAndDecompressBlock(blockMetadata, - allColumns); - }) + .computeOnce(cacheKey, + [&]() { + return readAndDecompressBlock(blockMetadata, + allColumns); + }) ._resultPointer; const DecompressedBlock& block = *sharedResultFromCache; diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index e76e2980b7..684334abb6 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -170,8 +170,6 @@ class ConcurrentCache { /** * @brief Obtain the result of an expensive computation. Do not recompute the * result if it is cached or currently being computed by another thread. - * @tparam ComputeFunction A callable whose operator() takes no argument and - * produces the computation result. * @param key A key that can uniquely identify a computation. For equal keys, * the associated computeFunctions must yield the same results. * @param computeFunction The actual computation. If the result has to be @@ -182,19 +180,18 @@ class ConcurrentCache { * @return A shared_ptr to the computation result. * */ - template + template ResultAndCacheStatus computeOnce(const Key& key, - ComputeFunction computeFunction, + std::invocable auto computeFunction, bool onlyReadFromCache = false) { - return computeOnceImpl(false, key, std::move(computeFunction), - onlyReadFromCache); + return computeOnceImpl( + false, key, std::move(computeFunction), onlyReadFromCache); } /// Similar to computeOnce, with the following addition: After the call /// completes, the result will be pinned in the underlying cache. - template ResultAndCacheStatus computeOncePinned(const Key& key, - ComputeFunction computeFunction, + std::invocable auto computeFunction, bool onlyReadFromCache = false) { return computeOnceImpl(true, key, std::move(computeFunction), onlyReadFromCache); @@ -311,9 +308,10 @@ class ConcurrentCache { private: // implementation for computeOnce (pinned and normal variant). - template + // TODO fix ugly hack of isCacheValueType + template ResultAndCacheStatus computeOnceImpl(bool pinned, const Key& key, - ComputeFunction computeFunction, + std::invocable auto computeFunction, bool onlyReadFromCache) { bool mustCompute; shared_ptr resultInProgress; @@ -356,6 +354,14 @@ class ConcurrentCache { try { // The actual computation shared_ptr result = make_shared(computeFunction()); + // TODO support storing generator in cache somehow + if constexpr (isCacheValueType) { + if (!result->resultTable()->isDataEvaluated()) { + // TODO use dedicated mechanism for this + resultInProgress->abort(); + return {std::move(result), CacheStatus::computed}; + } + } moveFromInProgressToCache(key, result); // Signal other threads who are waiting for the results. resultInProgress->finish(result); From ca1cbed2c38a21b4de41aaf419c07a5e89e9c191 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 22 Apr 2024 23:54:18 +0200 Subject: [PATCH 005/133] Avoid another class of exceptions --- src/engine/ExportQueryExecutionTrees.cpp | 12 +++++- src/engine/Operation.cpp | 9 ++++- src/engine/QueryExecutionTree.cpp | 4 +- src/engine/Result.cpp | 51 +++++++++++++++++++----- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 2f3da341a7..55f02a8c41 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -31,6 +31,7 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle) { + // TODO handle export of generators correctly for (size_t i : getRowIndices(limitAndOffset, res->idTable())) { ConstructQueryExportContext context{i, *res, qet.getVariableColumns(), qet.getQec()->getIndex()}; @@ -112,6 +113,7 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( std::shared_ptr resultTable, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(resultTable != nullptr); + // TODO handle export of generators correctly const IdTable& data = resultTable->idTable(); nlohmann::json json = nlohmann::json::array(); @@ -282,6 +284,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( std::erase(columns, std::nullopt); + // TODO handle export of generators correctly const IdTable& idTable = resultTable->idTable(); json result; @@ -424,6 +427,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, true); + // TODO handle export of generators correctly const auto& idTable = resultTable->idTable(); // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { @@ -575,6 +579,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: co_yield "\n"; resultTable->logResultSize(); + // TODO handle export of generators correctly const auto& idTable = resultTable->idTable(); auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); @@ -640,7 +645,10 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( resultTable->logResultSize(); auto timeResultComputation = requestTimer.msecs(); - size_t resultSize = resultTable->idTable().size(); + std::optional resultSize = + query.hasSelectClause() && resultTable->isDataEvaluated() + ? std::optional{resultTable->idTable().size()} + : std::nullopt; nlohmann::json j; @@ -676,7 +684,7 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( qet, query.constructClause().triples_, limitAndOffset, std::move(resultTable), std::move(cancellationHandle)); } - j["resultsize"] = query.hasSelectClause() ? resultSize : j["res"].size(); + j["resultsize"] = resultSize.value_or(j["res"].size()); j["time"]["total"] = std::to_string(requestTimer.msecs().count()) + "ms"; j["time"]["computeResult"] = std::to_string(timeResultComputation.count()) + "ms"; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 8fa66b9db4..ed547650f9 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -137,7 +137,10 @@ std::shared_ptr Operation::getResult(bool isRoot, // individual results, but that requires changes in each individual // operation, therefore we currently only perform this expensive // change in the DEBUG builds. + // This check doesn't make sense when the result has not been evaluated + // yet, so it should be moved into the operations eventually. AD_EXPENSIVE_CHECK( + result.isDataEvaluated() || result.checkDefinedness(getExternallyVisibleVariableColumns())); // Make sure that the results that are written to the cache have the // correct runtimeInfo. The children of the runtime info are already set @@ -155,7 +158,7 @@ std::shared_ptr Operation::getResult(bool isRoot, // runtime if neither a LIMIT nor an OFFSET were specified. result.applyLimitOffset(_limit); runtimeInfo().addLimitOffsetRow(_limit, limitTimer.msecs(), true); - } else { + } else if (result.isDataEvaluated()) { AD_CONTRACT_CHECK(result.idTable().numRows() == _limit.actualSize(result.idTable().numRows())); } @@ -231,7 +234,9 @@ void Operation::updateRuntimeInformationOnSuccess( const Result& resultTable, ad_utility::CacheStatus cacheStatus, Milliseconds duration, std::optional runtimeInfo) { _runtimeInfo->totalTime_ = duration; - _runtimeInfo->numRows_ = resultTable.idTable().size(); + // TODO replace 0 size with estimation or something + _runtimeInfo->numRows_ = + resultTable.isDataEvaluated() ? resultTable.idTable().size() : 0; _runtimeInfo->cacheStatus_ = cacheStatus; _runtimeInfo->status_ = RuntimeInformation::Status::fullyMaterialized; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 35aa938614..aa09bdf05f 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -83,7 +83,7 @@ size_t QueryExecutionTree::getCostEstimate() { // _____________________________________________________________________________ size_t QueryExecutionTree::getSizeEstimate() { if (!sizeEstimate_.has_value()) { - if (cachedResult_) { + if (cachedResult_ && cachedResult_->isDataEvaluated()) { sizeEstimate_ = cachedResult_->idTable().size(); } else { // if we are in a unit test setting and there is no QueryExecutionContest @@ -97,7 +97,7 @@ size_t QueryExecutionTree::getSizeEstimate() { // _____________________________________________________________________________ bool QueryExecutionTree::knownEmptyResult() { - if (cachedResult_) { + if (cachedResult_ && cachedResult_->isDataEvaluated()) { return cachedResult_->idTable().size() == 0; } return rootOperation_->knownEmptyResult(); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 738a453058..47ff945e62 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -64,13 +64,7 @@ Result::Result(TableType idTable, std::vector sortedBy, SharedLocalVocabWrapper{std::move(localVocab)}) {} // _____________________________________________________________________________ -void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { - // Apply the OFFSET clause. If the offset is `0` or the offset is larger - // than the size of the `IdTable`, then this has no effect and runtime - // `O(1)` (see the docs for `std::shift_left`). - // TODO handle generator case properly - AD_CONTRACT_CHECK(isDataEvaluated()); - auto& idTable = std::get<0>(_idTable); +void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { std::ranges::for_each( idTable.getColumns(), [offset = limitOffset.actualOffset(idTable.numRows()), @@ -85,15 +79,51 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { idTable.shrinkToFit(); } +// _____________________________________________________________________________ +void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { + // Apply the OFFSET clause. If the offset is `0` or the offset is larger + // than the size of the `IdTable`, then this has no effect and runtime + // `O(1)` (see the docs for `std::shift_left`). + // TODO make limit its own dedicated operation to avoid this + // modification here + AD_CONTRACT_CHECK(isDataEvaluated()); + using Gen = cppcoro::generator; + if (std::holds_alternative(_idTable)) { + modifyIdTable(std::get(_idTable), limitOffset); + } else if (std::holds_alternative(_idTable)) { + auto generator = [](Gen original, LimitOffsetClause limitOffset) -> Gen { + if (limitOffset._limit.value_or(1) == 0) { + co_return; + } + for (auto&& idTable : original) { + modifyIdTable(idTable, limitOffset); + uint64_t offsetDelta = limitOffset.actualOffset(idTable.numRows()); + limitOffset._offset -= offsetDelta; + if (limitOffset._limit.has_value()) { + limitOffset._limit.value() -= + limitOffset.actualSize(idTable.numRows() - offsetDelta); + } + if (limitOffset._offset == 0) { + co_yield std::move(idTable); + } + if (limitOffset._limit.value_or(1) == 0) { + break; + } + } + }(std::move(std::get(_idTable)), limitOffset); + _idTable = std::move(generator); + } else { + AD_FAIL(); + } +} + // _____________________________________________________________________________ auto Result::getOrComputeDatatypeCountsPerColumn() -> const DatatypeCountsPerColumn& { if (datatypeCountsPerColumn_.has_value()) { return datatypeCountsPerColumn_.value(); } - // TODO handle generator case properly - AD_CONTRACT_CHECK(isDataEvaluated()); - auto& idTable = std::get<0>(_idTable); + auto& idTable = std::get(_idTable); auto& types = datatypeCountsPerColumn_.emplace(); types.resize(idTable.numColumns()); for (size_t i = 0; i < idTable.numColumns(); ++i) { @@ -108,6 +138,7 @@ auto Result::getOrComputeDatatypeCountsPerColumn() // _____________________________________________________________ bool Result::checkDefinedness(const VariableToColumnMap& varColMap) { + AD_CONTRACT_CHECK(isDataEvaluated()); const auto& datatypesPerColumn = getOrComputeDatatypeCountsPerColumn(); return std::ranges::all_of(varColMap, [&](const auto& varAndCol) { const auto& [columnIndex, mightContainUndef] = varAndCol.second; From 9e7f3cb78dc102340b85f9f12924acb1dc8c9623 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 23 Apr 2024 18:33:43 +0200 Subject: [PATCH 006/133] Optimize imports --- src/engine/QueryPlanner.cpp | 3 ++- src/engine/QueryPlanner.h | 9 +++------ test/QueryPlannerTestHelpers.h | 6 ++++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index c743338a29..c96347057b 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -6,8 +6,9 @@ #include "engine/QueryPlanner.h" +#include + #include -#include #include "engine/Bind.h" #include "engine/CartesianProductJoin.h" diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index cb3c5deb92..1c44c02f5f 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -5,23 +5,20 @@ // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) #pragma once -#include + #include -#include "absl/strings/str_join.h" -#include "absl/strings/str_split.h" #include "engine/CheckUsePatternTrick.h" -#include "engine/Filter.h" #include "engine/QueryExecutionTree.h" #include "parser/GraphPattern.h" #include "parser/ParsedQuery.h" -using std::vector; - class QueryPlanner { using TextLimitMap = ad_utility::HashMap; using CancellationHandle = ad_utility::SharedCancellationHandle; + template + using vector = std::vector; public: explicit QueryPlanner(QueryExecutionContext* qec, diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index e73ab6b891..28d5c9086a 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -4,10 +4,14 @@ #pragma once +#include +#include + #include "./util/GTestHelpers.h" #include "engine/Bind.h" #include "engine/CartesianProductJoin.h" #include "engine/CountAvailablePredicates.h" +#include "engine/Filter.h" #include "engine/IndexScan.h" #include "engine/Join.h" #include "engine/MultiColumnJoin.h" @@ -21,8 +25,6 @@ #include "engine/TextLimit.h" #include "engine/TransitivePathBase.h" #include "engine/Union.h" -#include "gmock/gmock-matchers.h" -#include "gmock/gmock.h" #include "parser/SparqlParser.h" #include "util/IndexTestHelpers.h" From 4c75d428831eaa260e6fddc68577d8bb844f57a2 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:51:01 +0200 Subject: [PATCH 007/133] Introduce ReusableGenerator class --- src/engine/Result.cpp | 14 ++-- src/engine/Result.h | 7 +- src/util/ReusableGenerator.h | 130 +++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 9 deletions(-) create mode 100644 src/util/ReusableGenerator.h diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 47ff945e62..2c15a3b1f6 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -87,11 +87,13 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // TODO make limit its own dedicated operation to avoid this // modification here AD_CONTRACT_CHECK(isDataEvaluated()); - using Gen = cppcoro::generator; + using Gen = GeneratorType; if (std::holds_alternative(_idTable)) { modifyIdTable(std::get(_idTable), limitOffset); } else if (std::holds_alternative(_idTable)) { - auto generator = [](Gen original, LimitOffsetClause limitOffset) -> Gen { + auto generator = + [](cppcoro::generator original, + LimitOffsetClause limitOffset) -> cppcoro::generator { if (limitOffset._limit.value_or(1) == 0) { co_return; } @@ -110,8 +112,8 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { break; } } - }(std::move(std::get(_idTable)), limitOffset); - _idTable = std::move(generator); + }(std::move(std::get(_idTable)).extractGenerator(), limitOffset); + _idTable.emplace(std::move(generator)); } else { AD_FAIL(); } @@ -156,11 +158,11 @@ const IdTable& Result::idTable() const { } // _____________________________________________________________________________ -cppcoro::generator& Result::idTables() { +Result::GeneratorType& Result::idTables() { // TODO Find out if scenarios exist where it makes // sense to return a generator with a single element here. AD_CONTRACT_CHECK(!isDataEvaluated()); - return std::get>(_idTable); + return std::get(_idTable); } // _____________________________________________________________________________ diff --git a/src/engine/Result.h b/src/engine/Result.h index 8927b29767..b267dd15eb 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -15,8 +15,8 @@ #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "parser/data/LimitOffsetClause.h" -#include "util/Generator.h" #include "util/Log.h" +#include "util/ReusableGenerator.h" // The result of an `Operation`. This is the class QLever uses for all // intermediate or final results when processing a SPARQL query. The actual data @@ -24,7 +24,8 @@ class Result { private: // The actual entries. - using TableType = std::variant>; + using GeneratorType = ad_utility::ReusableGenerator; + using TableType = std::variant; TableType _idTable; // The column indices by which the result is sorted (primary sort key first). @@ -102,7 +103,7 @@ class Result { const IdTable& idTable() const; // Access to the underlying `IdTable`. - cppcoro::generator& idTables(); + GeneratorType& idTables(); // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return _sortedBy; } diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h new file mode 100644 index 0000000000..a0b4e48ed7 --- /dev/null +++ b/src/util/ReusableGenerator.h @@ -0,0 +1,130 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#ifndef REUSABLEGENERATOR_H +#define REUSABLEGENERATOR_H + +#include +#include + +#include "util/Exception.h" +#include "util/Generator.h" +#include "util/Synchronized.h" + +namespace ad_utility { + +template +class ReusableGenerator { + using GenIterator = typename cppcoro::generator::iterator; + using Reference = typename GenIterator::reference; + using Pointer = typename GenIterator::pointer; + + class ComputationStorage { + friend ReusableGenerator; + cppcoro::generator generator_; + std::optional generatorIterator_{}; + std::vector cachedValues_{}; + + explicit ComputationStorage(cppcoro::generator generator) + : generator_{std::move(generator)} {} + + public: + ComputationStorage(ComputationStorage&& other) = default; + ComputationStorage(const ComputationStorage& other) = delete; + ComputationStorage& operator=(ComputationStorage&& other) = default; + ComputationStorage& operator=(const ComputationStorage& other) = delete; + + private: + void advanceTo(size_t index) { + AD_CONTRACT_CHECK(index <= cachedValues_.size()); + if (index != cachedValues_.size()) { + return; + } + if (generatorIterator_.has_value()) { + AD_CONTRACT_CHECK(generatorIterator_.value() != generator_.end()); + ++generatorIterator_.value(); + } else { + generatorIterator_ = generator_.begin(); + } + if (generatorIterator_.value() != generator_.end()) { + cachedValues_.emplace_back(std::move(*generatorIterator_)); + } + } + + Reference getCachedValue(size_t index) { return cachedValues_.at(index); } + + bool isDone(size_t index) const noexcept { + return index == cachedValues_.size() && generatorIterator_.has_value() && + generatorIterator_.value() == generator_.end(); + } + }; + std::shared_ptr> computationStorage_; + + public: + explicit ReusableGenerator(cppcoro::generator generator) + : computationStorage_{std::make_shared>( + ComputationStorage{std::move(generator)})} {} + + ReusableGenerator(ReusableGenerator&& other) = default; + ReusableGenerator(const ReusableGenerator& other) = delete; + ReusableGenerator& operator=(ReusableGenerator&& other) = default; + ReusableGenerator& operator=(const ReusableGenerator& other) = delete; + + class IteratorSentinel {}; + + class Iterator { + size_t currentIndex_ = 0; + std::weak_ptr> storage_; + + explicit Iterator(std::weak_ptr> storage) + : storage_{storage} { + storage_->advanceTo(currentIndex_); + } + + friend bool operator==(const Iterator& it, IteratorSentinel) noexcept { + return !it.storage_.lock()->isDone(it.currentIndex_); + } + + friend bool operator!=(const Iterator& it, IteratorSentinel s) noexcept { + return !(it == s); + } + + friend bool operator==(IteratorSentinel s, const Iterator& it) noexcept { + return (it == s); + } + + friend bool operator!=(IteratorSentinel s, const Iterator& it) noexcept { + return it != s; + } + + Iterator& operator++() { + ++currentIndex_; + storage_.lock()->advanceTo(currentIndex_); + return *this; + } + + // Need to provide post-increment operator to implement the 'Range' concept. + void operator++(int) { (void)operator++(); } + + Reference operator*() const noexcept { + return storage_.lock()->getCachedValue(currentIndex_); + } + + Pointer operator->() const noexcept { return std::addressof(operator*()); } + }; + + Iterator begin() noexcept { return Iterator{}; } + + IteratorSentinel end() const noexcept { return IteratorSentinel{}; } + + cppcoro::generator extractGenerator() && { + auto lock = computationStorage_->wlock(); + cppcoro::generator result{std::move(lock->generator_)}; + computationStorage_.reset(); + return result; + } +}; +}; // namespace ad_utility + +#endif // REUSABLEGENERATOR_H From 892e4a5424be54219e4fb0f2589c6ad019070a37 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:59:12 +0200 Subject: [PATCH 008/133] Try to make caching work --- src/engine/QueryExecutionContext.h | 6 ++++-- src/engine/Result.cpp | 2 -- src/index/CompressedRelation.cpp | 10 +++++----- src/util/ConcurrentCache.h | 13 +------------ 4 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 77a67b41ba..8f65ae851a 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -38,9 +38,11 @@ class CacheValue { // Calculates the `MemorySize` taken up by an instance of `CacheValue`. struct SizeGetter { - // TODO Ensure this is only called for fully materialized results ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { - if (const auto& tablePtr = cacheValue._resultTable; tablePtr) { + // TODO find good solution how to calculate storage requirements + // for generator data + if (const auto& tablePtr = cacheValue._resultTable; + tablePtr && tablePtr->isDataEvaluated()) { return ad_utility::MemorySize::bytes(tablePtr->idTable().size() * tablePtr->idTable().numColumns() * sizeof(Id)); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 2c15a3b1f6..cf33d1ab73 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -159,8 +159,6 @@ const IdTable& Result::idTable() const { // _____________________________________________________________________________ Result::GeneratorType& Result::idTables() { - // TODO Find out if scenarios exist where it makes - // sense to return a generator with a single element here. AD_CONTRACT_CHECK(!isDataEvaluated()); return std::get(_idTable); } diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index 06d50f90c9..498cf741f4 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -436,11 +436,11 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( auto cacheKey = blockMetadata.offsetsAndCompressedSize_.at(0).offsetInFile_; auto sharedResultFromCache = blockCache_ - .computeOnce(cacheKey, - [&]() { - return readAndDecompressBlock(blockMetadata, - allColumns); - }) + .computeOnce(cacheKey, + [&]() { + return readAndDecompressBlock(blockMetadata, + allColumns); + }) ._resultPointer; const DecompressedBlock& block = *sharedResultFromCache; diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 684334abb6..ef6d289712 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -180,11 +180,10 @@ class ConcurrentCache { * @return A shared_ptr to the computation result. * */ - template ResultAndCacheStatus computeOnce(const Key& key, std::invocable auto computeFunction, bool onlyReadFromCache = false) { - return computeOnceImpl( + return computeOnceImpl( false, key, std::move(computeFunction), onlyReadFromCache); } @@ -308,8 +307,6 @@ class ConcurrentCache { private: // implementation for computeOnce (pinned and normal variant). - // TODO fix ugly hack of isCacheValueType - template ResultAndCacheStatus computeOnceImpl(bool pinned, const Key& key, std::invocable auto computeFunction, bool onlyReadFromCache) { @@ -354,14 +351,6 @@ class ConcurrentCache { try { // The actual computation shared_ptr result = make_shared(computeFunction()); - // TODO support storing generator in cache somehow - if constexpr (isCacheValueType) { - if (!result->resultTable()->isDataEvaluated()) { - // TODO use dedicated mechanism for this - resultInProgress->abort(); - return {std::move(result), CacheStatus::computed}; - } - } moveFromInProgressToCache(key, result); // Signal other threads who are waiting for the results. resultInProgress->finish(result); From 586365c8e31a4469e9869e1c8ad37e211940fdaf Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 23 Apr 2024 23:41:08 +0200 Subject: [PATCH 009/133] Fiddle around with const a bit --- src/engine/QueryExecutionContext.h | 3 ++- src/engine/Result.cpp | 3 ++- src/engine/Result.h | 5 ++--- src/util/ConcurrentCache.h | 5 +++-- src/util/ReusableGenerator.h | 10 ++++++---- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 8f65ae851a..519bd47598 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -40,7 +40,8 @@ class CacheValue { struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { // TODO find good solution how to calculate storage requirements - // for generator data + // for generator data, maybe allow later re-calculation current size by + // returning lambda? if (const auto& tablePtr = cacheValue._resultTable; tablePtr && tablePtr->isDataEvaluated()) { return ad_utility::MemorySize::bytes(tablePtr->idTable().size() * diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index cf33d1ab73..7bc045cbb4 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -8,6 +8,7 @@ #include "engine/LocalVocab.h" #include "util/Exception.h" +#include "util/Log.h" // _____________________________________________________________________________ string Result::asDebugString() const { @@ -158,7 +159,7 @@ const IdTable& Result::idTable() const { } // _____________________________________________________________________________ -Result::GeneratorType& Result::idTables() { +const Result::GeneratorType& Result::idTables() const { AD_CONTRACT_CHECK(!isDataEvaluated()); return std::get(_idTable); } diff --git a/src/engine/Result.h b/src/engine/Result.h index b267dd15eb..398273b9ec 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -15,7 +15,6 @@ #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "parser/data/LimitOffsetClause.h" -#include "util/Log.h" #include "util/ReusableGenerator.h" // The result of an `Operation`. This is the class QLever uses for all @@ -102,8 +101,8 @@ class Result { // Const access to the underlying `IdTable`. const IdTable& idTable() const; - // Access to the underlying `IdTable`. - GeneratorType& idTables(); + // Access to the underlying `IdTable`s. + const GeneratorType& idTables() const; // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return _sortedBy; } diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index ef6d289712..0bbed614ad 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -183,8 +183,8 @@ class ConcurrentCache { ResultAndCacheStatus computeOnce(const Key& key, std::invocable auto computeFunction, bool onlyReadFromCache = false) { - return computeOnceImpl( - false, key, std::move(computeFunction), onlyReadFromCache); + return computeOnceImpl(false, key, std::move(computeFunction), + onlyReadFromCache); } /// Similar to computeOnce, with the following addition: After the call @@ -328,6 +328,7 @@ class ConcurrentCache { } else if (onlyReadFromCache) { return {nullptr, CacheStatus::notInCacheAndNotComputed}; } else if (lockPtr->_inProgress.contains(key)) { + // TODO serialize into single IdTable if partially computed // the result is not cached, but someone else is computing it. // it is important, that we do not immediately call getResult() since // this call blocks and we currently hold a lock. diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h index a0b4e48ed7..49b50ad99d 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/ReusableGenerator.h @@ -17,8 +17,8 @@ namespace ad_utility { template class ReusableGenerator { using GenIterator = typename cppcoro::generator::iterator; - using Reference = typename GenIterator::reference; - using Pointer = typename GenIterator::pointer; + using Reference = const T&; + using Pointer = const T*; class ComputationStorage { friend ReusableGenerator; @@ -52,7 +52,9 @@ class ReusableGenerator { } } - Reference getCachedValue(size_t index) { return cachedValues_.at(index); } + Reference getCachedValue(size_t index) const { + return cachedValues_.at(index); + } bool isDone(size_t index) const noexcept { return index == cachedValues_.size() && generatorIterator_.has_value() && @@ -114,7 +116,7 @@ class ReusableGenerator { Pointer operator->() const noexcept { return std::addressof(operator*()); } }; - Iterator begin() noexcept { return Iterator{}; } + Iterator begin() const noexcept { return Iterator{computationStorage_}; } IteratorSentinel end() const noexcept { return IteratorSentinel{}; } From 80e2dbdbf29530473cac2b39a4cc699fe6dba26e Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 24 Apr 2024 00:29:36 +0200 Subject: [PATCH 010/133] Add more TODOs --- src/engine/Result.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 7bc045cbb4..c1ad9e2158 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -41,9 +41,11 @@ Result::Result(TableType idTable, std::vector sortedBy, _sortedBy{std::move(sortedBy)}, localVocab_{std::move(localVocab.localVocab_)} { AD_CONTRACT_CHECK(localVocab_ != nullptr); - AD_CONTRACT_CHECK(std::ranges::all_of(_sortedBy, [this](size_t numCols) { - return numCols < this->idTable().numColumns(); - })); + // TODO move checks into generators if possible + AD_CONTRACT_CHECK(!isDataEvaluated() || + std::ranges::all_of(_sortedBy, [this](size_t numCols) { + return numCols < this->idTable().numColumns(); + })); [[maybe_unused]] auto compareRowsByJoinColumns = [this](const auto& row1, const auto& row2) { @@ -55,6 +57,7 @@ Result::Result(TableType idTable, std::vector sortedBy, return false; }; AD_EXPENSIVE_CHECK( + !isDataEvaluated() || std::ranges::is_sorted(this->idTable(), compareRowsByJoinColumns)); } From 18ca5b1278b0cd3984e99249322a0cd13f8559f0 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 28 Apr 2024 23:10:06 +0200 Subject: [PATCH 011/133] Fix TextLimit code after rebase --- src/engine/TextLimit.cpp | 7 ++++--- src/engine/TextLimit.h | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/engine/TextLimit.cpp b/src/engine/TextLimit.cpp index 4b7eff3455..0dbb2dd835 100644 --- a/src/engine/TextLimit.cpp +++ b/src/engine/TextLimit.cpp @@ -18,11 +18,12 @@ TextLimit::TextLimit(QueryExecutionContext* qec, const size_t limit, scoreColumns_(scoreColumns) {} // _____________________________________________________________________________ -ResultTable TextLimit::computeResult() { - shared_ptr childRes = child_->getResult(); +Result TextLimit::computeResult([[maybe_unused]] bool requestLazyness) { + std::shared_ptr childRes = child_->getResult(); if (limit_ == 0) { - return {IdTable(childRes->width(), getExecutionContext()->getAllocator()), + return {IdTable(childRes->idTable().numColumns(), + getExecutionContext()->getAllocator()), resultSortedOn(), childRes->getSharedLocalVocab()}; } diff --git a/src/engine/TextLimit.h b/src/engine/TextLimit.h index 97f930674a..7d1c91bf6c 100644 --- a/src/engine/TextLimit.h +++ b/src/engine/TextLimit.h @@ -5,6 +5,7 @@ #pragma once #include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" // This class implements the TextLimit operation. It limits the number of texts // that are returned for each unique entity combination. The texts are selected @@ -61,7 +62,7 @@ class TextLimit : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - ResultTable computeResult() override; + Result computeResult([[maybe_unused]] bool requestLazyness) override; vector getChildren() override { return {child_.get()}; } }; From 86a9f4baee5e4e08b55dc399a9e0a04c98f514dd Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 28 Apr 2024 23:30:11 +0200 Subject: [PATCH 012/133] Fix compilation issues for ReusableGenerator --- src/util/ReusableGenerator.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h index 49b50ad99d..94a9e567c6 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/ReusableGenerator.h @@ -48,7 +48,7 @@ class ReusableGenerator { generatorIterator_ = generator_.begin(); } if (generatorIterator_.value() != generator_.end()) { - cachedValues_.emplace_back(std::move(*generatorIterator_)); + cachedValues_.emplace_back(std::move(*generatorIterator_.value())); } } @@ -56,7 +56,7 @@ class ReusableGenerator { return cachedValues_.at(index); } - bool isDone(size_t index) const noexcept { + bool isDone(size_t index) noexcept { return index == cachedValues_.size() && generatorIterator_.has_value() && generatorIterator_.value() == generator_.end(); } @@ -79,13 +79,14 @@ class ReusableGenerator { size_t currentIndex_ = 0; std::weak_ptr> storage_; + public: explicit Iterator(std::weak_ptr> storage) : storage_{storage} { - storage_->advanceTo(currentIndex_); + storage_.lock()->wlock()->advanceTo(currentIndex_); } friend bool operator==(const Iterator& it, IteratorSentinel) noexcept { - return !it.storage_.lock()->isDone(it.currentIndex_); + return !it.storage_.lock()->wlock()->isDone(it.currentIndex_); } friend bool operator!=(const Iterator& it, IteratorSentinel s) noexcept { @@ -99,10 +100,9 @@ class ReusableGenerator { friend bool operator!=(IteratorSentinel s, const Iterator& it) noexcept { return it != s; } - Iterator& operator++() { ++currentIndex_; - storage_.lock()->advanceTo(currentIndex_); + storage_.lock()->wlock()->advanceTo(currentIndex_); return *this; } @@ -110,7 +110,7 @@ class ReusableGenerator { void operator++(int) { (void)operator++(); } Reference operator*() const noexcept { - return storage_.lock()->getCachedValue(currentIndex_); + return storage_.lock()->rlock()->getCachedValue(currentIndex_); } Pointer operator->() const noexcept { return std::addressof(operator*()); } From 7f0a5e744b83556831f06f089bbb8b9dbe219eb6 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 29 Apr 2024 00:08:44 +0200 Subject: [PATCH 013/133] Remove offset calculations from exporter --- src/engine/CartesianProductJoin.cpp | 2 +- src/engine/ExportQueryExecutionTrees.cpp | 120 ++++++++---------- src/engine/ExportQueryExecutionTrees.h | 24 +--- src/engine/Operation.h | 3 +- src/engine/QueryPlanner.cpp | 4 +- src/engine/Result.cpp | 1 + src/engine/Server.cpp | 28 ++-- src/engine/Server.h | 3 +- src/parser/data/ConstructQueryExportContext.h | 3 +- src/parser/data/Variable.cpp | 5 +- test/ExportQueryExecutionTreeTest.cpp | 6 +- test/SparqlDataTypesTest.cpp | 3 +- 12 files changed, 98 insertions(+), 104 deletions(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 53f6ccde8d..95e18e5eb9 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -149,7 +149,7 @@ Result CartesianProductJoin::computeResult([[maybe_unused]] bool requestLazyness // Get all child results (possibly with limit, see above). for (auto& child : childView()) { - if (limitIfPresent.has_value() && child.supportsLimit()) { + if (limitIfPresent.has_value()) { child.setLimit(limitIfPresent.value()); } subResults.push_back(child.getResult()); diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 55f02a8c41..f88f86f273 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -14,13 +14,28 @@ // __________________________________________________________________________ namespace { -// Return a range that contains the indices of the rows that have to be exported -// from the `idTable` given the `LimitOffsetClause`. It takes into account the -// LIMIT, the OFFSET, and the actual size of the `idTable` -auto getRowIndices(const LimitOffsetClause& limitOffset, - const IdTable& idTable) { - return std::views::iota(limitOffset.actualOffset(idTable.size()), - limitOffset.upperBound(idTable.size())); + +struct IndexWithTable { + size_t index_; + const IdTable& idTable_; +}; + +cppcoro::generator getIdTables(const Result& result) { + if (result.isDataEvaluated()) { + co_yield result.idTable(); + } else { + for (const IdTable& idTable : result.idTables()) { + co_yield idTable; + } + } +} + +cppcoro::generator getRowIndices(const Result& result) { + for (const IdTable& idTable : getIdTables(result)) { + for (size_t index = 0; index < idTable.numRows(); index++) { + co_yield {index, idTable}; + } + } } } // namespace @@ -29,11 +44,11 @@ cppcoro::generator ExportQueryExecutionTrees::constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr res, + std::shared_ptr result, CancellationHandle cancellationHandle) { - // TODO handle export of generators correctly - for (size_t i : getRowIndices(limitAndOffset, res->idTable())) { - ConstructQueryExportContext context{i, *res, qet.getVariableColumns(), + for (auto [i, idTable] : getRowIndices(*result)) { + ConstructQueryExportContext context{i, idTable, result->localVocab(), + qet.getVariableColumns(), qet.getQec()->getIndex()}; using enum PositionInTriple; for (const auto& triple : constructTriples) { @@ -57,13 +72,11 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { resultTable->logResultSize(); auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples( - qet, constructTriples, limitAndOffset, resultTable, - std::move(cancellationHandle)); + qet, constructTriples, resultTable, std::move(cancellationHandle)); for (const auto& triple : generator) { co_yield triple.subject_; co_yield ' '; @@ -92,11 +105,9 @@ nlohmann::json ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - const LimitOffsetClause& limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle) { - auto generator = constructQueryResultToTriples(qet, constructTriples, - limitAndOffset, std::move(res), - std::move(cancellationHandle)); + std::shared_ptr res, CancellationHandle cancellationHandle) { + auto generator = constructQueryResultToTriples( + qet, constructTriples, std::move(res), std::move(cancellationHandle)); std::vector> jsonArray; for (auto& triple : generator) { jsonArray.push_back({std::move(triple.subject_), @@ -108,16 +119,14 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( // __________________________________________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( - const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, + const QueryExecutionTree& qet, const QueryExecutionTree::ColumnIndicesAndTypes& columns, - std::shared_ptr resultTable, + std::shared_ptr result, CancellationHandle cancellationHandle) { - AD_CORRECTNESS_CHECK(resultTable != nullptr); - // TODO handle export of generators correctly - const IdTable& data = resultTable->idTable(); + AD_CORRECTNESS_CHECK(result != nullptr); nlohmann::json json = nlohmann::json::array(); - for (size_t rowIndex : getRowIndices(limitAndOffset, data)) { + for (auto [rowIndex, idTable] : getRowIndices(*result)) { // We need the explicit `array` constructor for the special case of zero // variables. json.push_back(nlohmann::json::array()); @@ -127,9 +136,9 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( row.emplace_back(nullptr); continue; } - const auto& currentId = data(rowIndex, opt->columnIndex_); + const auto& currentId = idTable(rowIndex, opt->columnIndex_); const auto& optionalStringAndXsdType = idToStringAndType( - qet.getQec()->getIndex(), currentId, resultTable->localVocab()); + qet.getQec()->getIndex(), currentId, result->localVocab()); if (!optionalStringAndXsdType.has_value()) { row.emplace_back(nullptr); continue; @@ -268,7 +277,6 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { using nlohmann::json; @@ -284,9 +292,6 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( std::erase(columns, std::nullopt); - // TODO handle export of generators correctly - const IdTable& idTable = resultTable->idTable(); - json result; std::vector selectedVars = selectClause.getSelectedVariablesAsStrings(); @@ -354,7 +359,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - for (size_t rowIndex : getRowIndices(limitAndOffset, idTable)) { + for (auto [rowIndex, idTable] : getRowIndices(*resultTable)) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. nlohmann::ordered_json binding; @@ -389,7 +394,6 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(resultTable != nullptr); @@ -398,7 +402,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( qet.selectedVariablesToColumnIndices(selectClause, true); return ExportQueryExecutionTrees::idTableToQLeverJSONArray( - qet, limitAndOffset, selectedColumnIndices, std::move(resultTable), + qet, selectedColumnIndices, std::move(resultTable), std::move(cancellationHandle)); } @@ -410,7 +414,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle) { + CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::turtle); @@ -427,11 +431,9 @@ ExportQueryExecutionTrees::selectQueryResultToStream( auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, true); - // TODO handle export of generators correctly - const auto& idTable = resultTable->idTable(); // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (size_t i : getRowIndices(limitAndOffset, idTable)) { + for (auto [i, idTable] : getRowIndices(*resultTable)) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { co_yield std::string_view{reinterpret_cast(&idTable( @@ -459,7 +461,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (size_t i : getRowIndices(limitAndOffset, idTable)) { + for (auto [i, idTable] : getRowIndices(*resultTable)) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); @@ -555,7 +557,6 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle) { using namespace std::string_view_literals; co_yield "\n" @@ -579,12 +580,10 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: co_yield "\n"; resultTable->logResultSize(); - // TODO handle export of generators correctly - const auto& idTable = resultTable->idTable(); auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (size_t i : getRowIndices(limitAndOffset, idTable)) { + for (auto [i, idTable] : getRowIndices(*resultTable)) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { @@ -609,7 +608,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::sparqlXml); @@ -624,8 +623,7 @@ ExportQueryExecutionTrees::constructQueryResultToStream( : RdfEscaping::escapeForCsv; constexpr char sep = format == MediaType::tsv ? '\t' : ','; auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples( - qet, constructTriples, limitAndOffset, resultTable, - std::move(cancellationHandle)); + qet, constructTriples, resultTable, std::move(cancellationHandle)); for (auto& triple : generator) { co_yield escapeFunction(std::move(triple.subject_)); co_yield sep; @@ -639,7 +637,7 @@ ExportQueryExecutionTrees::constructQueryResultToStream( // _____________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, + const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle) { std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); @@ -672,16 +670,14 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( nlohmann::ordered_json(runtimeInformation); { - auto limitAndOffset = query._limitOffset; - limitAndOffset._limit = std::min(limitAndOffset.limitOrDefault(), maxSend); j["res"] = query.hasSelectClause() ? ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( - qet, query.selectClause(), limitAndOffset, - std::move(resultTable), std::move(cancellationHandle)) + qet, query.selectClause(), std::move(resultTable), + std::move(cancellationHandle)) : ExportQueryExecutionTrees:: constructQueryResultBindingsToQLeverJSON( - qet, query.constructClause().triples_, limitAndOffset, + qet, query.constructClause().triples_, std::move(resultTable), std::move(cancellationHandle)); } j["resultsize"] = resultSize.value_or(j["res"].size()); @@ -698,15 +694,13 @@ ExportQueryExecutionTrees::computeResultAsStream( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, ad_utility::MediaType mediaType, CancellationHandle cancellationHandle) { auto compute = [&] { - auto limitAndOffset = parsedQuery._limitOffset; return parsedQuery.hasSelectClause() ? ExportQueryExecutionTrees::selectQueryResultToStream( - qet, parsedQuery.selectClause(), limitAndOffset, + qet, parsedQuery.selectClause(), std::move(cancellationHandle)) : ExportQueryExecutionTrees::constructQueryResultToStream< format>(qet, parsedQuery.constructClause().triples_, - limitAndOffset, qet.getResult(), - std::move(cancellationHandle)); + qet.getResult(), std::move(cancellationHandle)); }; using enum MediaType; @@ -725,7 +719,7 @@ ExportQueryExecutionTrees::computeResultAsStream( // _____________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( - const ParsedQuery& query, const QueryExecutionTree& qet, uint64_t maxSend, + const ParsedQuery& query, const QueryExecutionTree& qet, CancellationHandle cancellationHandle) { if (!query.hasSelectClause()) { AD_THROW( @@ -734,10 +728,8 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); nlohmann::json j; - auto limitAndOffset = query._limitOffset; - limitAndOffset._limit = std::min(limitAndOffset.limitOrDefault(), maxSend); j = ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( - qet, query.selectClause(), limitAndOffset, std::move(resultTable), + qet, query.selectClause(), std::move(resultTable), std::move(cancellationHandle)); return j; } @@ -745,17 +737,17 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( // _____________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::computeResultAsJSON( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, - ad_utility::MediaType mediaType, CancellationHandle cancellationHandle) { + const ad_utility::Timer& requestTimer, ad_utility::MediaType mediaType, + CancellationHandle cancellationHandle) { try { switch (mediaType) { case ad_utility::MediaType::qleverJson: return computeQueryResultAsQLeverJSON(parsedQuery, qet, requestTimer, - maxSend, + std::move(cancellationHandle)); case ad_utility::MediaType::sparqlJson: return computeSelectQueryResultAsSparqlJSON( - parsedQuery, qet, maxSend, std::move(cancellationHandle)); + parsedQuery, qet, std::move(cancellationHandle)); default: AD_FAIL(); } diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index e3a22afead..46699171d6 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -3,7 +3,6 @@ // Author: Johannes Kalmbach #include "engine/QueryExecutionTree.h" -#include "parser/data/LimitOffsetClause.h" #include "util/CancellationHandle.h" #include "util/http/MediaTypes.h" #include "util/json.h" @@ -49,11 +48,10 @@ class ExportQueryExecutionTrees { // single JSON object that is fully materialized before the function returns. // The `requestTimer` is used to report timing statistics on the query. It // must have already run during the query planning to produce the expected - // results. If `maxSend` is smaller than the size of the query result, then - // only the first `maxSend` rows are returned. + // results. static nlohmann::json computeResultAsJSON( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, + const ad_utility::Timer& requestTimer, MediaType mediaType, CancellationHandle cancellationHandle); // Convert the `id` to a human-readable string. The `index` is used to resolve @@ -97,18 +95,17 @@ class ExportQueryExecutionTrees { // Similar to `queryToJSON`, but always returns the `QLeverJSON` format. static nlohmann::json computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, + const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle); // Similar to `queryToJSON`, but always returns the `SparqlJSON` format. static nlohmann::json computeSelectQueryResultAsSparqlJSON( - const ParsedQuery& query, const QueryExecutionTree& qet, uint64_t maxSend, + const ParsedQuery& query, const QueryExecutionTree& qet, CancellationHandle cancellationHandle); // ___________________________________________________________________________ static nlohmann::json selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -118,9 +115,6 @@ class ExportQueryExecutionTrees { * `computeQueryResultAsQLeverJSON` to obtain the "actual" query results * (without the meta data) * @param qet The `QueryExecutionTree` of the query. - * @param from the first entries of the idTable are skipped - * @param limitAndOffset at most entries are written, starting at - * * @param columns each pair of tells * us which columns are to be serialized in which order * @param resultTable The query result in the ID space. If it is `nullptr`, @@ -128,7 +122,7 @@ class ExportQueryExecutionTrees { * @return a 2D-Json array corresponding to the IdTable given the arguments */ static nlohmann::json idTableToQLeverJSONArray( - const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, + const QueryExecutionTree& qet, const QueryExecutionTree::ColumnIndicesAndTypes& columns, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -137,7 +131,6 @@ class ExportQueryExecutionTrees { static nlohmann::json constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - const LimitOffsetClause& limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle); // Generate an RDF graph for a CONSTRUCT query. @@ -145,14 +138,12 @@ class ExportQueryExecutionTrees { constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle); + std::shared_ptr res, CancellationHandle cancellationHandle); // ___________________________________________________________________________ static nlohmann::json selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -161,7 +152,6 @@ class ExportQueryExecutionTrees { static ad_utility::streams::stream_generator constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -170,5 +160,5 @@ class ExportQueryExecutionTrees { static ad_utility::streams::stream_generator selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); + CancellationHandle cancellationHandle); }; diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 7e30237acd..3cc2f6e819 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -165,7 +165,8 @@ class Operation { void recursivelySetTimeConstraint( std::chrono::steady_clock::time_point deadline); - // True iff this operation directly implement a `LIMIT` clause on its result. + // True iff this operation directly implement a `OFFEST` and `LIMIT` clause on + // its result. [[nodiscard]] virtual bool supportsLimit() const { return false; } // Set the value of the `LIMIT` clause that will be applied to the result of diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index c96347057b..3f401dc903 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -153,9 +153,7 @@ std::vector QueryPlanner::createExecutionTrees( vector& lastRow = plans.back(); for (auto& plan : lastRow) { - if (plan._qet->getRootOperation()->supportsLimit()) { - plan._qet->getRootOperation()->setLimit(pq._limitOffset); - } + plan._qet->getRootOperation()->setLimit(pq._limitOffset); } AD_CONTRACT_CHECK(!lastRow.empty()); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index c1ad9e2158..b738194505 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -83,6 +83,7 @@ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { idTable.shrinkToFit(); } +// TODO add unit tests for this // _____________________________________________________________________________ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index d0940213eb..2721337a6c 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -616,8 +616,6 @@ boost::asio::awaitable Server::processQuery( const std::string& expected) { return params.contains(param) && params.at(param) == expected; }; - size_t maxSend = params.contains("send") ? std::stoul(params.at("send")) - : MAX_NOF_ROWS_IN_RESULT; const bool pinSubtrees = containsParam("pinsubtrees", "true"); const bool pinResult = containsParam("pinresult", "true"); LOG(INFO) << "Processing the following SPARQL query:" @@ -647,6 +645,14 @@ boost::asio::awaitable Server::processQuery( } else if (containsParam("action", "binary_export")) { mediaType = MediaType::octetStream; } + std::optional maxSend = + params.contains("send") ? std::optional{std::stoul(params.at("send"))} + : std::nullopt; + // Limit JSON requests by default + if (!maxSend.has_value() && (mediaType == MediaType::sparqlJson || + mediaType == MediaType::qleverJson)) { + maxSend = MAX_NOF_ROWS_IN_RESULT; + } std::string_view acceptHeader = request.base()[http::field::accept]; @@ -683,8 +689,8 @@ boost::asio::awaitable Server::processQuery( auto [cancellationHandle, cancelTimeoutOnDestruction] = setupCancellationHandle(messageSender.getQueryId(), timeLimit); - plannedQuery = - co_await parseAndPlan(query, qec, cancellationHandle, timeLimit); + plannedQuery = co_await parseAndPlan(query, qec, cancellationHandle, + timeLimit, maxSend); AD_CORRECTNESS_CHECK(plannedQuery.has_value()); auto& qet = plannedQuery.value().queryExecutionTree_; qet.isRoot() = true; // allow pinning of the final result @@ -713,10 +719,10 @@ boost::asio::awaitable Server::processQuery( case sparqlJson: { // Normal case: JSON response auto responseString = co_await computeInNewThread( - [&plannedQuery, &qet, &requestTimer, maxSend, mediaType, + [&plannedQuery, &qet, &requestTimer, mediaType, &cancellationHandle] { return ExportQueryExecutionTrees::computeResultAsJSON( - plannedQuery.value().parsedQuery_, qet, requestTimer, maxSend, + plannedQuery.value().parsedQuery_, qet, requestTimer, mediaType.value(), cancellationHandle); }, cancellationHandle); @@ -820,7 +826,8 @@ Awaitable Server::computeInNewThread(Function function, // _____________________________________________________________________________ net::awaitable> Server::parseAndPlan( const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit) { + SharedCancellationHandle handle, TimeLimit timeLimit, + std::optional maxSend) { auto handleCopy = handle; // The usage of an `optional` here is required because of a limitation in @@ -830,9 +837,12 @@ net::awaitable> Server::parseAndPlan( // probably related to issues in GCC's coroutine implementation. return computeInNewThread( [&query, &qec, enablePatternTrick = enablePatternTrick_, - handle = std::move(handle), - timeLimit]() mutable -> std::optional { + handle = std::move(handle), timeLimit, + maxSend]() mutable -> std::optional { auto pq = SparqlParser::parseQuery(query); + if (maxSend.has_value() && !pq._limitOffset._limit.has_value()) { + pq._limitOffset._limit = maxSend.value(); + } handle->throwIfCancelled(); QueryPlanner qp(&qec, handle); qp.setEnablePatternTrick(enablePatternTrick); diff --git a/src/engine/Server.h b/src/engine/Server.h index 4c3deb0d67..0a56a712f0 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -183,7 +183,8 @@ class Server { /// technical reasons that are described in the definition of this function. net::awaitable> parseAndPlan( const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit); + SharedCancellationHandle handle, TimeLimit timeLimit, + std::optional maxSend); /// Acquire the `CancellationHandle` for the given `QueryId`, start the /// watchdog and call `cancelAfterDeadline` to set the timeout after diff --git a/src/parser/data/ConstructQueryExportContext.h b/src/parser/data/ConstructQueryExportContext.h index 253e8614bb..359282c326 100644 --- a/src/parser/data/ConstructQueryExportContext.h +++ b/src/parser/data/ConstructQueryExportContext.h @@ -18,7 +18,8 @@ enum struct PositionInTriple : int { SUBJECT, PREDICATE, OBJECT }; // All the data that is needed to evaluate an element in a construct query. struct ConstructQueryExportContext { const size_t _row; - const Result& _res; + const IdTable& idTable_; + const LocalVocab& localVocab_; const VariableToColumnMap& _variableColumns; const Index& _qecIndex; }; diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index 8b7a3207e7..c8835731cd 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -29,15 +29,14 @@ Variable::Variable(std::string name) : _name{std::move(name)} { // Call stack. Most notably the check which columns belongs to this variable // should be much further up in the call stack. size_t row = context._row; - const Result& res = context._res; const auto& variableColumns = context._variableColumns; const Index& qecIndex = context._qecIndex; - const auto& idTable = res.idTable(); + const auto& idTable = context.idTable_; if (variableColumns.contains(*this)) { size_t index = variableColumns.at(*this).columnIndex_; auto id = idTable(row, index); auto optionalStringAndType = ExportQueryExecutionTrees::idToStringAndType( - qecIndex, id, res.localVocab()); + qecIndex, id, context.localVocab_); if (!optionalStringAndType.has_value()) { return std::nullopt; } diff --git a/test/ExportQueryExecutionTreeTest.cpp b/test/ExportQueryExecutionTreeTest.cpp index 139c480916..0c993740a4 100644 --- a/test/ExportQueryExecutionTreeTest.cpp +++ b/test/ExportQueryExecutionTreeTest.cpp @@ -57,7 +57,7 @@ nlohmann::json runJSONQuery(const std::string& kg, const std::string& query, auto qet = qp.createExecutionTree(pq); ad_utility::Timer timer{ad_utility::Timer::Started}; return ExportQueryExecutionTrees::computeResultAsJSON( - pq, qet, timer, 200, mediaType, std::move(cancellationHandle)); + pq, qet, timer, mediaType, std::move(cancellationHandle)); } // A test case that tests the correct execution and exporting of a SELECT query @@ -862,8 +862,8 @@ TEST_P(JsonMediaTypesFixture, CancellationCancelsJson) { cancellationHandle->cancel(ad_utility::CancellationState::MANUAL); AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( ExportQueryExecutionTrees::computeResultAsJSON( - pq, qet, ad_utility::Timer{ad_utility::Timer::Started}, 200, - GetParam(), std::move(cancellationHandle)), + pq, qet, ad_utility::Timer{ad_utility::Timer::Started}, GetParam(), + std::move(cancellationHandle)), HasSubstr("Query export"), ad_utility::CancellationException); } INSTANTIATE_TEST_SUITE_P(JsonMediaTypes, JsonMediaTypesFixture, diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp index e4cf4279fe..a84e39f8d4 100644 --- a/test/SparqlDataTypesTest.cpp +++ b/test/SparqlDataTypesTest.cpp @@ -22,7 +22,8 @@ struct ContextWrapper { VariableToColumnMap _hashMap{}; ConstructQueryExportContext createContextForRow(size_t row) const { - return {row, _resultTable, _hashMap, _index}; + return {row, _resultTable.idTable(), _resultTable.localVocab(), _hashMap, + _index}; } void setIdTable(IdTable&& table) { From aee20dd35685cbe57ab2a0b84223e3d248a7a164 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 29 Apr 2024 00:10:52 +0200 Subject: [PATCH 014/133] Fix typo --- src/engine/Bind.cpp | 2 +- src/engine/Bind.h | 2 +- src/engine/CartesianProductJoin.cpp | 2 +- src/engine/CartesianProductJoin.h | 2 +- src/engine/CountAvailablePredicates.cpp | 2 +- src/engine/CountAvailablePredicates.h | 2 +- src/engine/Distinct.cpp | 2 +- src/engine/Distinct.h | 2 +- src/engine/Filter.cpp | 2 +- src/engine/Filter.h | 2 +- src/engine/GroupBy.cpp | 2 +- src/engine/GroupBy.h | 2 +- src/engine/HasPredicateScan.cpp | 2 +- src/engine/HasPredicateScan.h | 2 +- src/engine/IndexScan.cpp | 2 +- src/engine/IndexScan.h | 2 +- src/engine/Join.cpp | 2 +- src/engine/Join.h | 2 +- src/engine/Minus.cpp | 2 +- src/engine/Minus.h | 2 +- src/engine/MultiColumnJoin.cpp | 2 +- src/engine/MultiColumnJoin.h | 2 +- src/engine/NeutralElementOperation.h | 2 +- src/engine/Operation.cpp | 10 +++++----- src/engine/Operation.h | 8 ++++---- src/engine/OptionalJoin.cpp | 2 +- src/engine/OptionalJoin.h | 2 +- src/engine/OrderBy.cpp | 2 +- src/engine/OrderBy.h | 2 +- src/engine/QueryExecutionTree.h | 4 ++-- src/engine/Service.cpp | 2 +- src/engine/Service.h | 2 +- src/engine/Sort.cpp | 2 +- src/engine/Sort.h | 2 +- src/engine/TextIndexScanForEntity.cpp | 2 +- src/engine/TextIndexScanForEntity.h | 2 +- src/engine/TextIndexScanForWord.cpp | 2 +- src/engine/TextIndexScanForWord.h | 2 +- src/engine/TextLimit.cpp | 2 +- src/engine/TextLimit.h | 2 +- src/engine/TransitivePathImpl.h | 2 +- src/engine/Union.cpp | 2 +- src/engine/Union.h | 2 +- src/engine/Values.cpp | 2 +- src/engine/Values.h | 2 +- test/engine/ValuesForTesting.h | 2 +- test/util/OperationTestHelpers.h | 4 ++-- 47 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 67650c22c5..3f7af15c71 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -81,7 +81,7 @@ std::vector Bind::getChildren() { } // _____________________________________________________________________________ -Result Bind::computeResult([[maybe_unused]] bool requestLazyness) { +Result Bind::computeResult([[maybe_unused]] bool requestLaziness) { using std::endl; LOG(DEBUG) << "Get input to BIND operation..." << endl; std::shared_ptr subRes = _subtree->getResult(); diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 8d2128e900..f4a298c214 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -46,7 +46,7 @@ class Bind : public Operation { [[nodiscard]] vector resultSortedOn() const override; private: - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; // Implementation for the binding of arbitrary expressions. template diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 95e18e5eb9..86172967bf 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -132,7 +132,7 @@ void CartesianProductJoin::writeResultColumn(std::span targetColumn, } } // ____________________________________________________________________________ -Result CartesianProductJoin::computeResult([[maybe_unused]] bool requestLazyness) { +Result CartesianProductJoin::computeResult([[maybe_unused]] bool requestLaziness) { IdTable result{getExecutionContext()->getAllocator()}; result.setNumColumns(getResultWidth()); std::vector> subResults; diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 64fbf839d0..872698f733 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -79,7 +79,7 @@ class CartesianProductJoin : public Operation { private: //! Compute the result of the query-subtree rooted at this element.. - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; // Copy each element from the `inputColumn` `groupSize` times to the // `targetColumn`. Repeat until the `targetColumn` is copletely filled. Skip diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index 4a024e2740..78c1f6ae5a 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -100,7 +100,7 @@ size_t CountAvailablePredicates::getCostEstimate() { } // _____________________________________________________________________________ -Result CountAvailablePredicates::computeResult([[maybe_unused]] bool requestLazyness) { +Result CountAvailablePredicates::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "CountAvailablePredicates result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(2); diff --git a/src/engine/CountAvailablePredicates.h b/src/engine/CountAvailablePredicates.h index 6664aae161..4565e031eb 100644 --- a/src/engine/CountAvailablePredicates.h +++ b/src/engine/CountAvailablePredicates.h @@ -103,6 +103,6 @@ class CountAvailablePredicates : public Operation { void computePatternTrickAllEntities( IdTable* result, const CompactVectorOfStrings& patterns) const; - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp index 25fe723ca7..8393ef04f9 100644 --- a/src/engine/Distinct.cpp +++ b/src/engine/Distinct.cpp @@ -38,7 +38,7 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const { } // _____________________________________________________________________________ -Result Distinct::computeResult([[maybe_unused]] bool requestLazyness) { +Result Distinct::computeResult([[maybe_unused]] bool requestLaziness) { IdTable idTable{getExecutionContext()->getAllocator()}; LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl; std::shared_ptr subRes = _subtree->getResult(); diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h index 9f9960efb9..620fea4f4b 100644 --- a/src/engine/Distinct.h +++ b/src/engine/Distinct.h @@ -55,7 +55,7 @@ class Distinct : public Operation { [[nodiscard]] string getCacheKeyImpl() const override; private: - virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; + virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 2198530d32..d69aef51c0 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -43,7 +43,7 @@ string Filter::getDescriptor() const { } // _____________________________________________________________________________ -Result Filter::computeResult([[maybe_unused]] bool requestLazyness) { +Result Filter::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl; std::shared_ptr subRes = _subtree->getResult(); LOG(DEBUG) << "Filter result computation..." << endl; diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 8b44f31592..1b1119491a 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -58,7 +58,7 @@ class Filter : public Operation { return _subtree->getVariableColumns(); } - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; template void computeFilterImpl(IdTable* outputIdTable, diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 88f6fe23af..7067b9788e 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -309,7 +309,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput, *dynResult = std::move(result).toDynamic(); } -Result GroupBy::computeResult([[maybe_unused]] bool requestLazyness) { +Result GroupBy::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "GroupBy result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index ee433317ff..eb417a9d2d 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -89,7 +89,7 @@ class GroupBy : public Operation { private: VariableToColumnMap computeVariableToColumnMap() const override; - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; template void processGroup(const Aggregate& expression, diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index 6f674e5bd1..9c863d939d 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -254,7 +254,7 @@ size_t HasPredicateScan::getCostEstimate() { } // ___________________________________________________________________________ -Result HasPredicateScan::computeResult([[maybe_unused]] bool requestLazyness) { +Result HasPredicateScan::computeResult([[maybe_unused]] bool requestLaziness) { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h index 52ad881354..e1cd4e821e 100644 --- a/src/engine/HasPredicateScan.h +++ b/src/engine/HasPredicateScan.h @@ -109,7 +109,7 @@ class HasPredicateScan : public Operation { const CompactVectorOfStrings& patterns); private: - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 5b73e8f6b1..0aec6a3e6a 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -123,7 +123,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { return variableToColumnMap; } // _____________________________________________________________________________ -Result IndexScan::computeResult([[maybe_unused]] bool requestLazyness) { +Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "IndexScan result computation...\n"; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 398c8430a2..c8aa89bba9 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -104,7 +104,7 @@ class IndexScan : public Operation { std::array getPermutedTriple() const; private: - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {}; } diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 3082ea7a06..89863a9c51 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -90,7 +90,7 @@ string Join::getCacheKeyImpl() const { string Join::getDescriptor() const { return "Join on " + _joinVar.name(); } // _____________________________________________________________________________ -Result Join::computeResult([[maybe_unused]] bool requestLazyness) { +Result Join::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Getting sub-results for join result computation..." << endl; size_t leftWidth = _left->getResultWidth(); size_t rightWidth = _right->getResultWidth(); diff --git a/src/engine/Join.h b/src/engine/Join.h index aaf3f7e63b..716bb4c5c3 100644 --- a/src/engine/Join.h +++ b/src/engine/Join.h @@ -115,7 +115,7 @@ class Join : public Operation { virtual string getCacheKeyImpl() const override; private: - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Minus.cpp b/src/engine/Minus.cpp index 6a5cd8af23..529e685184 100644 --- a/src/engine/Minus.cpp +++ b/src/engine/Minus.cpp @@ -32,7 +32,7 @@ string Minus::getCacheKeyImpl() const { string Minus::getDescriptor() const { return "Minus"; } // _____________________________________________________________________________ -Result Minus::computeResult([[maybe_unused]] bool requestLazyness) { +Result Minus::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Minus result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/Minus.h b/src/engine/Minus.h index 378642bc6f..115f4ccba4 100644 --- a/src/engine/Minus.h +++ b/src/engine/Minus.h @@ -72,7 +72,7 @@ class Minus : public Operation { const IdTableView& a, const IdTableView& b, size_t ia, size_t ib, const vector>& matchedColumns); - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp index 999e55b0c8..20e3d491ce 100644 --- a/src/engine/MultiColumnJoin.cpp +++ b/src/engine/MultiColumnJoin.cpp @@ -60,7 +60,7 @@ string MultiColumnJoin::getDescriptor() const { } // _____________________________________________________________________________ -Result MultiColumnJoin::computeResult([[maybe_unused]] bool requestLazyness) { +Result MultiColumnJoin::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "MultiColumnJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/MultiColumnJoin.h b/src/engine/MultiColumnJoin.h index d60079c60f..6ed454e0ba 100644 --- a/src/engine/MultiColumnJoin.h +++ b/src/engine/MultiColumnJoin.h @@ -63,7 +63,7 @@ class MultiColumnJoin : public Operation { IdTable* resultMightBeUnsorted); private: - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/NeutralElementOperation.h b/src/engine/NeutralElementOperation.h index e4ee67a1d9..f0b5cdb62e 100644 --- a/src/engine/NeutralElementOperation.h +++ b/src/engine/NeutralElementOperation.h @@ -40,7 +40,7 @@ class NeutralElementOperation : public Operation { }; private: - Result computeResult([[maybe_unused]] bool requestLazyness) override { + Result computeResult([[maybe_unused]] bool requestLaziness) override { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(0); idTable.resize(1); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index ed547650f9..bce4e6fc99 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -72,8 +72,8 @@ void Operation::recursivelySetTimeConstraint( // ________________________________________________________________________ std::shared_ptr Operation::getResult(bool isRoot, bool onlyReadFromCache, - bool requestLazyness) { - AD_CONTRACT_CHECK(!onlyReadFromCache || !requestLazyness); + bool requestLaziness) { + AD_CONTRACT_CHECK(!onlyReadFromCache || !requestLaziness); ad_utility::Timer timer{ad_utility::Timer::Started}; if (isRoot) { @@ -122,12 +122,12 @@ std::shared_ptr Operation::getResult(bool isRoot, updateRuntimeInformationOnFailure(timer.msecs()); } }); - auto computeLambda = [this, &timer, requestLazyness] { + auto computeLambda = [this, &timer, requestLaziness] { checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); - Result result = computeResult(requestLazyness); - AD_CONTRACT_CHECK(requestLazyness || result.isDataEvaluated()); + Result result = computeResult(requestLaziness); + AD_CONTRACT_CHECK(requestLaziness || result.isDataEvaluated()); checkCancellation(); // Compute the datatypes that occur in each column of the result. diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 3cc2f6e819..f96b4e47e4 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -148,7 +148,7 @@ class Operation { */ std::shared_ptr getResult(bool isRoot = false, bool onlyReadFromCache = false, - bool requestLazyness = false); + bool requestLaziness = false); // Use the same cancellation handle for all children of an operation (= query // plan rooted at that operation). As soon as one child is aborted, the whole @@ -198,8 +198,8 @@ class Operation { // testing, otherwise the `getResult()` function should be used which also // sets the runtime info and uses the cache. virtual Result computeResultOnlyForTesting( - bool requestLazyness = false) final { - return computeResult(requestLazyness); + bool requestLaziness = false) final { + return computeResult(requestLaziness); } protected: @@ -249,7 +249,7 @@ class Operation { private: //! Compute the result of the query-subtree rooted at this element.. - virtual Result computeResult(bool requestLazyness) = 0; + virtual Result computeResult(bool requestLaziness) = 0; // Create and store the complete runtime information for this operation after // it has either been succesfully computed or read from the cache. diff --git a/src/engine/OptionalJoin.cpp b/src/engine/OptionalJoin.cpp index 80d6e11973..0f042681cc 100644 --- a/src/engine/OptionalJoin.cpp +++ b/src/engine/OptionalJoin.cpp @@ -89,7 +89,7 @@ string OptionalJoin::getDescriptor() const { } // _____________________________________________________________________________ -Result OptionalJoin::computeResult([[maybe_unused]] bool requestLazyness) { +Result OptionalJoin::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "OptionalJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h index 63d3511fe0..37f712409a 100644 --- a/src/engine/OptionalJoin.h +++ b/src/engine/OptionalJoin.h @@ -75,7 +75,7 @@ class OptionalJoin : public Operation { private: void computeSizeEstimateAndMultiplicities(); - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/OrderBy.cpp b/src/engine/OrderBy.cpp index 984a92b9e4..da005cf352 100644 --- a/src/engine/OrderBy.cpp +++ b/src/engine/OrderBy.cpp @@ -63,7 +63,7 @@ std::string OrderBy::getDescriptor() const { } // _____________________________________________________________________________ -Result OrderBy::computeResult([[maybe_unused]] bool requestLazyness) { +Result OrderBy::computeResult([[maybe_unused]] bool requestLaziness) { using std::endl; LOG(DEBUG) << "Getting sub-result for OrderBy result computation..." << endl; std::shared_ptr subRes = subtree_->getResult(); diff --git a/src/engine/OrderBy.h b/src/engine/OrderBy.h index 91b15220ca..69289ae75e 100644 --- a/src/engine/OrderBy.h +++ b/src/engine/OrderBy.h @@ -78,7 +78,7 @@ class OrderBy : public Operation { } private: - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override { return subtree_->getVariableColumns(); diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index abec47266e..f54cf83c3c 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -51,8 +51,8 @@ class QueryExecutionTree { size_t getResultWidth() const { return rootOperation_->getResultWidth(); } - std::shared_ptr getResult(bool requestLazyness = false) const { - return rootOperation_->getResult(isRoot(), false, requestLazyness); + std::shared_ptr getResult(bool requestLaziness = false) const { + return rootOperation_->getResult(isRoot(), false, requestLaziness); } // A variable, its column index in the Id space result, and the `ResultType` diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 053c8c6ff9..71d1617a59 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -83,7 +83,7 @@ size_t Service::getCostEstimate() { } // ____________________________________________________________________________ -Result Service::computeResult([[maybe_unused]] bool requestLazyness) { +Result Service::computeResult([[maybe_unused]] bool requestLaziness) { // Get the URL of the SPARQL endpoint. std::string_view serviceIriString = parsedServiceClause_.serviceIri_.iri(); AD_CONTRACT_CHECK(serviceIriString.starts_with("<") && diff --git a/src/engine/Service.h b/src/engine/Service.h index 09f89ae896..8a942bf145 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -80,7 +80,7 @@ class Service : public Operation { std::string getCacheKeyImpl() const override; // Compute the result using `getTsvFunction_`. - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; // Write the given TSV result to the given result object. The `I` is the width // of the result table. diff --git a/src/engine/Sort.cpp b/src/engine/Sort.cpp index 71012820d7..28b6be76ac 100644 --- a/src/engine/Sort.cpp +++ b/src/engine/Sort.cpp @@ -51,7 +51,7 @@ std::string Sort::getDescriptor() const { } // _____________________________________________________________________________ -Result Sort::computeResult([[maybe_unused]] bool requestLazyness) { +Result Sort::computeResult([[maybe_unused]] bool requestLaziness) { using std::endl; LOG(DEBUG) << "Getting sub-result for Sort result computation..." << endl; std::shared_ptr subRes = subtree_->getResult(); diff --git a/src/engine/Sort.h b/src/engine/Sort.h index 48ebef179f..d8a77c4f8d 100644 --- a/src/engine/Sort.h +++ b/src/engine/Sort.h @@ -67,7 +67,7 @@ class Sort : public Operation { } private: - virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; + virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override { diff --git a/src/engine/TextIndexScanForEntity.cpp b/src/engine/TextIndexScanForEntity.cpp index f693d78afe..2d8e236bc9 100644 --- a/src/engine/TextIndexScanForEntity.cpp +++ b/src/engine/TextIndexScanForEntity.cpp @@ -14,7 +14,7 @@ TextIndexScanForEntity::TextIndexScanForEntity( word_(std::move(word)) {} // _____________________________________________________________________________ -Result TextIndexScanForEntity::computeResult([[maybe_unused]] bool requestLazyness) { +Result TextIndexScanForEntity::computeResult([[maybe_unused]] bool requestLaziness) { IdTable idTable = getExecutionContext()->getIndex().getEntityMentionsForWord( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForEntity.h b/src/engine/TextIndexScanForEntity.h index adf1acbcb8..4679847f74 100644 --- a/src/engine/TextIndexScanForEntity.h +++ b/src/engine/TextIndexScanForEntity.h @@ -101,7 +101,7 @@ class TextIndexScanForEntity : public Operation { return std::get(varOrFixed_.entity_).second; } - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 5f6e770634..42860adfb2 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -13,7 +13,7 @@ TextIndexScanForWord::TextIndexScanForWord(QueryExecutionContext* qec, isPrefix_(word_.ends_with('*')) {} // _____________________________________________________________________________ -Result TextIndexScanForWord::computeResult([[maybe_unused]] bool requestLazyness) { +Result TextIndexScanForWord::computeResult([[maybe_unused]] bool requestLaziness) { IdTable idTable = getExecutionContext()->getIndex().getWordPostingsForTerm( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForWord.h b/src/engine/TextIndexScanForWord.h index c4c1d9126c..139c7d40fe 100644 --- a/src/engine/TextIndexScanForWord.h +++ b/src/engine/TextIndexScanForWord.h @@ -50,7 +50,7 @@ class TextIndexScanForWord : public Operation { private: // Returns a Result containing an IdTable with the columns being // the text variable and the completed word (if it was prefixed) - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TextLimit.cpp b/src/engine/TextLimit.cpp index 0dbb2dd835..1988763a1a 100644 --- a/src/engine/TextLimit.cpp +++ b/src/engine/TextLimit.cpp @@ -18,7 +18,7 @@ TextLimit::TextLimit(QueryExecutionContext* qec, const size_t limit, scoreColumns_(scoreColumns) {} // _____________________________________________________________________________ -Result TextLimit::computeResult([[maybe_unused]] bool requestLazyness) { +Result TextLimit::computeResult([[maybe_unused]] bool requestLaziness) { std::shared_ptr childRes = child_->getResult(); if (limit_ == 0) { diff --git a/src/engine/TextLimit.h b/src/engine/TextLimit.h index 7d1c91bf6c..d0a60bd2d6 100644 --- a/src/engine/TextLimit.h +++ b/src/engine/TextLimit.h @@ -62,7 +62,7 @@ class TextLimit : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - Result computeResult([[maybe_unused]] bool requestLazyness) override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {child_.get()}; } }; diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index 38c24710ed..a4a116d9ce 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -142,7 +142,7 @@ class TransitivePathImpl : public TransitivePathBase { * * @return Result The result of the TransitivePath operation */ - Result computeResult([[maybe_unused]] bool requestLazyness) override { + Result computeResult([[maybe_unused]] bool requestLaziness) override { if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && rhs_.isVariable()) { AD_THROW( diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 9571d3b060..901f1df7e3 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -158,7 +158,7 @@ size_t Union::getCostEstimate() { getSizeEstimateBeforeLimit(); } -Result Union::computeResult([[maybe_unused]] bool requestLazyness) { +Result Union::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Union result computation..." << std::endl; std::shared_ptr subRes1 = _subtrees[0]->getResult(); std::shared_ptr subRes2 = _subtrees[1]->getResult(); diff --git a/src/engine/Union.h b/src/engine/Union.h index 38c3c8f114..d70b715e5a 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -61,7 +61,7 @@ class Union : public Operation { } private: - virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; + virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Values.cpp b/src/engine/Values.cpp index 37ce45e96e..ec6451f3eb 100644 --- a/src/engine/Values.cpp +++ b/src/engine/Values.cpp @@ -108,7 +108,7 @@ void Values::computeMultiplicities() { } // ____________________________________________________________________________ -Result Values::computeResult([[maybe_unused]] bool requestLazyness) { +Result Values::computeResult([[maybe_unused]] bool requestLaziness) { // Set basic properties of the result table. IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/Values.h b/src/engine/Values.h index ad51647b36..52823bf5ea 100644 --- a/src/engine/Values.h +++ b/src/engine/Values.h @@ -48,7 +48,7 @@ class Values : public Operation { public: // These two are also used by class `Service`, hence public. - virtual Result computeResult([[maybe_unused]] bool requestLazyness) override; + virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 03f12477ed..a0f7e09e30 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -49,7 +49,7 @@ class ValuesForTesting : public Operation { size_t& costEstimate() { return costEstimate_; } // ___________________________________________________________________________ - Result computeResult([[maybe_unused]] bool requestLazyness) override { + Result computeResult([[maybe_unused]] bool requestLaziness) override { auto table = table_.clone(); if (supportsLimit_) { table.erase(table.begin() + getLimit().upperBound(table.size()), diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index ca500083cc..a3f7183230 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -31,7 +31,7 @@ class StallForeverOperation : public Operation { using Operation::Operation; // Do-nothing operation that runs for 100ms without computing anything, but // which can be cancelled. - Result computeResult([[maybe_unused]] bool requestLazyness) override { + Result computeResult([[maybe_unused]] bool requestLaziness) override { auto end = std::chrono::steady_clock::now() + 100ms; while (std::chrono::steady_clock::now() < end) { checkCancellation(); @@ -73,7 +73,7 @@ class ShallowParentOperation : public Operation { return {child_.get()}; } - Result computeResult([[maybe_unused]] bool requestLazyness) override { + Result computeResult([[maybe_unused]] bool requestLaziness) override { auto childResult = child_->getResult(); return {childResult->idTable().clone(), resultSortedOn(), childResult->getSharedLocalVocab()}; From 7576b2e6753d30195fdfc70c1259c2c7749d5d4a Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 29 Apr 2024 01:45:24 +0200 Subject: [PATCH 015/133] Add comments --- src/engine/CartesianProductJoin.cpp | 3 ++- src/engine/Operation.h | 1 + src/util/ConcurrentCache.h | 9 +++++++++ src/util/ReusableGenerator.h | 11 +++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 86172967bf..2141b7bbe8 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -132,7 +132,8 @@ void CartesianProductJoin::writeResultColumn(std::span targetColumn, } } // ____________________________________________________________________________ -Result CartesianProductJoin::computeResult([[maybe_unused]] bool requestLaziness) { +Result CartesianProductJoin::computeResult( + [[maybe_unused]] bool requestLaziness) { IdTable result{getExecutionContext()->getAllocator()}; result.setNumColumns(getResultWidth()); std::vector> subResults; diff --git a/src/engine/Operation.h b/src/engine/Operation.h index f96b4e47e4..5a30603b4a 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -249,6 +249,7 @@ class Operation { private: //! Compute the result of the query-subtree rooted at this element.. + // TODO turn bool into enum maybe? virtual Result computeResult(bool requestLaziness) = 0; // Create and store the complete runtime information for this operation after diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 0bbed614ad..da3d770707 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -307,6 +307,15 @@ class ConcurrentCache { private: // implementation for computeOnce (pinned and normal variant). + // TODO Accept cache extractor function (Result, computeFunction, + // isInitiator) -> Result/Value, in the case of a generator for the idtables, + // this extractor would wrap the generator inside another generator that + // catches exceptions indicating too slow consumption and calls + // computeFunction to make up for the "lost" data. On completion, if the whole + // thing fits in the cache replace with a non-generator variant. In case a + // non-lazy idtable was requested and a lazy idtable is in cache, iterate over + // it to aggregate the values. On exception (because you might not have + // ownership), invoke computeFunction and put the result into cache again. ResultAndCacheStatus computeOnceImpl(bool pinned, const Key& key, std::invocable auto computeFunction, bool onlyReadFromCache) { diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h index 94a9e567c6..28a8e26726 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/ReusableGenerator.h @@ -14,6 +14,17 @@ namespace ad_utility { +// TODO Plans for this class: Rename this class to cache-aware +// generator or something. Introduce the concept of an "owner" of a generator +// which bounds generation to a maximum storage size, throwing exceptions +// if a non-owning iterator is consuming too slow, and blocking if non-owning +// iterators are too fast. Ownership can expire if the "owning iterator" is +// destroyed. It clears cached values after itself. It needs to be able to hold +// a callback to be called whenever the stored size changes. Also when the +// generator is completely consumed and when the maximum cache size would be +// exceeded if no elements were deleted at the front of the cache to make sure +// this entry is evicted from the cache. + template class ReusableGenerator { using GenIterator = typename cppcoro::generator::iterator; From 7765a25207621fa4e2654f9d81aa0e9d1a0d2d2f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 29 Apr 2024 01:48:10 +0200 Subject: [PATCH 016/133] Make supportsLimit private to avoid misuse --- src/engine/Operation.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 5a30603b4a..fd3a66d5da 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -99,6 +99,10 @@ class Operation { private: virtual uint64_t getSizeEstimateBeforeLimit() = 0; + // True iff this operation directly implement a `OFFSET` and `LIMIT` clause on + // its result. + [[nodiscard]] virtual bool supportsLimit() const { return false; } + public: virtual float getMultiplicity(size_t col) = 0; virtual bool knownEmptyResult() = 0; @@ -165,10 +169,6 @@ class Operation { void recursivelySetTimeConstraint( std::chrono::steady_clock::time_point deadline); - // True iff this operation directly implement a `OFFEST` and `LIMIT` clause on - // its result. - [[nodiscard]] virtual bool supportsLimit() const { return false; } - // Set the value of the `LIMIT` clause that will be applied to the result of // this operation. void setLimit(const LimitOffsetClause& limitOffsetClause) { From f815be8c52649b4c71db81764ee8f49c91c3e5ee Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 1 May 2024 19:44:06 +0200 Subject: [PATCH 017/133] Properly use minimum limit if present --- src/engine/Server.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index 2721337a6c..a710950aae 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -840,8 +840,9 @@ net::awaitable> Server::parseAndPlan( handle = std::move(handle), timeLimit, maxSend]() mutable -> std::optional { auto pq = SparqlParser::parseQuery(query); - if (maxSend.has_value() && !pq._limitOffset._limit.has_value()) { - pq._limitOffset._limit = maxSend.value(); + if (maxSend.has_value()) { + pq._limitOffset._limit = + std::min(maxSend.value(), pq._limitOffset.limitOrDefault()); } handle->throwIfCancelled(); QueryPlanner qp(&qec, handle); From 90cca50cd7ce5ccfbb276d073143ad82dab071d5 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 1 May 2024 21:58:02 +0200 Subject: [PATCH 018/133] Start adding code to manipulate code after cache extraction --- src/engine/Operation.cpp | 30 ++++++++++++++++++----- src/engine/QueryExecutionTree.cpp | 11 ++++++--- src/util/ConcurrentCache.h | 40 +++++++++++++------------------ 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index bce4e6fc99..d519700ad1 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -122,11 +122,13 @@ std::shared_ptr Operation::getResult(bool isRoot, updateRuntimeInformationOnFailure(timer.msecs()); } }); - auto computeLambda = [this, &timer, requestLaziness] { + bool actuallyComputed = false; + auto computeLambda = [this, &timer, requestLaziness, &actuallyComputed] { checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); Result result = computeResult(requestLaziness); + actuallyComputed = true; AD_CONTRACT_CHECK(requestLaziness || result.isDataEvaluated()); checkCancellation(); @@ -162,13 +164,22 @@ std::shared_ptr Operation::getResult(bool isRoot, AD_CONTRACT_CHECK(result.idTable().numRows() == _limit.actualSize(result.idTable().numRows())); } + return result; + }; + + auto cacheSetup = [this, &computeLambda]() { + auto result = computeLambda(); + if (!result.isDataEvaluated()) { + // TODO register listeners that make sure cache size is + // properly updated + } return CacheValue{std::move(result), runtimeInfo()}; }; - auto result = (pinResult) ? cache.computeOncePinned(cacheKey, computeLambda, - onlyReadFromCache) - : cache.computeOnce(cacheKey, computeLambda, - onlyReadFromCache); + auto result = + pinResult + ? cache.computeOncePinned(cacheKey, cacheSetup, onlyReadFromCache) + : cache.computeOnce(cacheKey, cacheSetup, onlyReadFromCache); if (result._resultPointer == nullptr) { AD_CORRECTNESS_CHECK(onlyReadFromCache); @@ -184,7 +195,14 @@ std::shared_ptr Operation::getResult(bool isRoot, LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " << resultNumCols << std::endl; } - return result._resultPointer->resultTable(); + + if (result._resultPointer->resultTable()->isDataEvaluated() || + actuallyComputed) { + return result._resultPointer->resultTable(); + } else { + // TODO create result copy with fallback iterator here + AD_FAIL(); + } } catch (ad_utility::CancellationException& e) { e.setOperation(getDescriptor()); runtimeInfo().status_ = RuntimeInformation::Status::cancelled; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index aa09bdf05f..2f7dd762ce 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -83,7 +83,8 @@ size_t QueryExecutionTree::getCostEstimate() { // _____________________________________________________________________________ size_t QueryExecutionTree::getSizeEstimate() { if (!sizeEstimate_.has_value()) { - if (cachedResult_ && cachedResult_->isDataEvaluated()) { + if (cachedResult_) { + AD_CORRECTNESS_CHECK(cachedResult_->isDataEvaluated()); sizeEstimate_ = cachedResult_->idTable().size(); } else { // if we are in a unit test setting and there is no QueryExecutionContest @@ -97,7 +98,8 @@ size_t QueryExecutionTree::getSizeEstimate() { // _____________________________________________________________________________ bool QueryExecutionTree::knownEmptyResult() { - if (cachedResult_ && cachedResult_->isDataEvaluated()) { + if (cachedResult_) { + AD_CORRECTNESS_CHECK(cachedResult_->isDataEvaluated()); return cachedResult_->idTable().size() == 0; } return rootOperation_->knownEmptyResult(); @@ -117,7 +119,10 @@ void QueryExecutionTree::readFromCache() { auto& cache = qec_->getQueryTreeCache(); auto res = cache.getIfContained(getCacheKey()); if (res.has_value()) { - cachedResult_ = res->_resultPointer->resultTable(); + auto resultTable = res->_resultPointer->resultTable(); + if (resultTable->isDataEvaluated()) { + cachedResult_ = std::move(resultTable); + } } } diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index da3d770707..e6266e2c72 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -18,7 +18,6 @@ namespace ad_utility { -using std::make_shared; using std::shared_ptr; /** This exception is thrown if we are waiting for a computation result, @@ -38,6 +37,8 @@ class WaitedForResultWhichThenFailedException : public std::exception { enum struct CacheStatus { cachedNotPinned, cachedPinned, + // TODO Rename to notCached, the name is just confusing. Can + // potentially be merged with notInCacheAndNotComputed. computed, notInCacheAndNotComputed }; @@ -180,20 +181,20 @@ class ConcurrentCache { * @return A shared_ptr to the computation result. * */ - ResultAndCacheStatus computeOnce(const Key& key, - std::invocable auto computeFunction, - bool onlyReadFromCache = false) { - return computeOnceImpl(false, key, std::move(computeFunction), - onlyReadFromCache); + ResultAndCacheStatus computeOnce( + const Key& key, + const InvocableWithConvertibleReturnType auto& computeFunction, + bool onlyReadFromCache = false) { + return computeOnceImpl(false, key, computeFunction, onlyReadFromCache); } /// Similar to computeOnce, with the following addition: After the call /// completes, the result will be pinned in the underlying cache. - ResultAndCacheStatus computeOncePinned(const Key& key, - std::invocable auto computeFunction, - bool onlyReadFromCache = false) { - return computeOnceImpl(true, key, std::move(computeFunction), - onlyReadFromCache); + ResultAndCacheStatus computeOncePinned( + const Key& key, + const InvocableWithConvertibleReturnType auto& computeFunction, + bool onlyReadFromCache = false) { + return computeOnceImpl(true, key, computeFunction, onlyReadFromCache); } /// Clear the cache (but not the pinned entries) @@ -307,18 +308,11 @@ class ConcurrentCache { private: // implementation for computeOnce (pinned and normal variant). - // TODO Accept cache extractor function (Result, computeFunction, - // isInitiator) -> Result/Value, in the case of a generator for the idtables, - // this extractor would wrap the generator inside another generator that - // catches exceptions indicating too slow consumption and calls - // computeFunction to make up for the "lost" data. On completion, if the whole - // thing fits in the cache replace with a non-generator variant. In case a - // non-lazy idtable was requested and a lazy idtable is in cache, iterate over - // it to aggregate the values. On exception (because you might not have - // ownership), invoke computeFunction and put the result into cache again. - ResultAndCacheStatus computeOnceImpl(bool pinned, const Key& key, - std::invocable auto computeFunction, - bool onlyReadFromCache) { + ResultAndCacheStatus computeOnceImpl( + bool pinned, const Key& key, + const InvocableWithConvertibleReturnType auto& computeFunction, + bool onlyReadFromCache) { + using std::make_shared; bool mustCompute; shared_ptr resultInProgress; // first determine whether we have to compute the result, From 694c21f8b435f0aca6496f90b8fc6f61236dcd26 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 5 May 2024 20:41:22 +0200 Subject: [PATCH 019/133] Implement fallback mechanism for failed cache share --- src/engine/Operation.cpp | 9 ++-- src/engine/Result.cpp | 57 ++++++++++++++++++++-- src/engine/Result.h | 15 ++++-- src/util/ConcurrentCache.h | 2 - src/util/ReusableGenerator.h | 91 +++++++++++++++++++++++++++++++----- 5 files changed, 148 insertions(+), 26 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index d519700ad1..856f37565e 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -172,6 +172,8 @@ std::shared_ptr Operation::getResult(bool isRoot, if (!result.isDataEvaluated()) { // TODO register listeners that make sure cache size is // properly updated + // TODO serialize into single IdTable if partially computed + // all chunks fit into memory and generator is exhausted. } return CacheValue{std::move(result), runtimeInfo()}; }; @@ -199,10 +201,9 @@ std::shared_ptr Operation::getResult(bool isRoot, if (result._resultPointer->resultTable()->isDataEvaluated() || actuallyComputed) { return result._resultPointer->resultTable(); - } else { - // TODO create result copy with fallback iterator here - AD_FAIL(); } + return std::make_shared(Result::createResultWithFallback( + result._resultPointer->resultTable(), std::move(computeLambda))); } catch (ad_utility::CancellationException& e) { e.setOperation(getDescriptor()); runtimeInfo().status_ = RuntimeInformation::Status::cancelled; @@ -252,7 +253,7 @@ void Operation::updateRuntimeInformationOnSuccess( const Result& resultTable, ad_utility::CacheStatus cacheStatus, Milliseconds duration, std::optional runtimeInfo) { _runtimeInfo->totalTime_ = duration; - // TODO replace 0 size with estimation or something + // TODO find a better representation for "unknown" than 0. _runtimeInfo->numRows_ = resultTable.isDataEvaluated() ? resultTable.idTable().size() : 0; _runtimeInfo->cacheStatus_ = cacheStatus; diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index b738194505..ee91d7ccfe 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -91,8 +91,9 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // `O(1)` (see the docs for `std::shift_left`). // TODO make limit its own dedicated operation to avoid this // modification here - AD_CONTRACT_CHECK(isDataEvaluated()); - using Gen = GeneratorType; + AD_CONTRACT_CHECK( + !std::holds_alternative>(_idTable)); + using Gen = ad_utility::ReusableGenerator; if (std::holds_alternative(_idTable)) { modifyIdTable(std::get(_idTable), limitOffset); } else if (std::holds_alternative(_idTable)) { @@ -163,9 +164,20 @@ const IdTable& Result::idTable() const { } // _____________________________________________________________________________ -const Result::GeneratorType& Result::idTables() const { +cppcoro::generator Result::idTables() const { AD_CONTRACT_CHECK(!isDataEvaluated()); - return std::get(_idTable); + return std::visit( + [](auto& generator) -> cppcoro::generator { + if constexpr (!std::is_same_v) { + for (auto&& idTable : generator) { + co_yield idTable; + } + } else { + // Type of variant here should never be `IdTable` + AD_FAIL(); + } + }, + _idTable); } // _____________________________________________________________________________ @@ -182,3 +194,40 @@ void Result::logResultSize() const { LOG(INFO) << "Result has unknown size (not computed yet)" << std::endl; } } + +// _____________________________________________________________________________ +Result Result::createResultWithFallback(std::shared_ptr original, + std::function fallback) { + AD_CONTRACT_CHECK(!original->isDataEvaluated()); + auto generator = [](std::shared_ptr sharedResult, + std::function fallback) + -> cppcoro::generator { + size_t index = 0; + try { + for (auto&& idTable : sharedResult->idTables()) { + co_yield idTable; + index++; + } + co_return; + } catch (const ad_utility::IteratorExpired&) { + // co_yield is not allowed here, so simply ignore this and allow control + // flow to take over + } catch (...) { + throw; + } + Result freshResult = fallback(); + // If data is evaluated this means that this process is not deterministic + // or that there's a wrong callback used here. + AD_CORRECTNESS_CHECK(!freshResult.isDataEvaluated()); + for (auto&& idTable : freshResult.idTables()) { + if (index > 0) { + index--; + continue; + } + co_yield idTable; + } + }; + return Result{generator(std::move(original), std::move(fallback)), + original->_sortedBy, + SharedLocalVocabWrapper{original->localVocab_}}; +} diff --git a/src/engine/Result.h b/src/engine/Result.h index 398273b9ec..e1ca310f83 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -22,10 +22,12 @@ // is always a table and contained in the member `idTable()`. class Result { private: - // The actual entries. - using GeneratorType = ad_utility::ReusableGenerator; - using TableType = std::variant; - TableType _idTable; + using TableType = + std::variant, + cppcoro::generator>; + // The actual entries. Since generators need to be modified + // in order to be consumed, this needs to be mutable. + mutable TableType _idTable; // The column indices by which the result is sorted (primary sort key first). // Empty if the result is not sorted on any column. @@ -102,7 +104,7 @@ class Result { const IdTable& idTable() const; // Access to the underlying `IdTable`s. - const GeneratorType& idTables() const; + cppcoro::generator idTables() const; // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return _sortedBy; } @@ -174,4 +176,7 @@ class Result { // undefined values in the `_idTable` of this result. Return `true` iff the // check is succesful. bool checkDefinedness(const VariableToColumnMap& varColMap); + + static Result createResultWithFallback(std::shared_ptr original, + std::function fallback); }; diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index e6266e2c72..bb195e56be 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -331,8 +331,6 @@ class ConcurrentCache { } else if (onlyReadFromCache) { return {nullptr, CacheStatus::notInCacheAndNotComputed}; } else if (lockPtr->_inProgress.contains(key)) { - // TODO serialize into single IdTable if partially computed - // the result is not cached, but someone else is computing it. // it is important, that we do not immediately call getResult() since // this call blocks and we currently hold a lock. diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h index 28a8e26726..01d23ba3bc 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/ReusableGenerator.h @@ -11,9 +11,12 @@ #include "util/Exception.h" #include "util/Generator.h" #include "util/Synchronized.h" +#include "util/UniqueCleanup.h" namespace ad_utility { +class IteratorExpired : std::exception {}; + // TODO Plans for this class: Rename this class to cache-aware // generator or something. Introduce the concept of an "owner" of a generator // which bounds generation to a maximum storage size, throwing exceptions @@ -35,7 +38,10 @@ class ReusableGenerator { friend ReusableGenerator; cppcoro::generator generator_; std::optional generatorIterator_{}; - std::vector cachedValues_{}; + std::vector> cachedValues_{}; + bool masterExists_ = true; + std::function onSizeChanged_{}; + std::function onGeneratorFinished_{}; explicit ComputationStorage(cppcoro::generator generator) : generator_{std::move(generator)} {} @@ -47,11 +53,21 @@ class ReusableGenerator { ComputationStorage& operator=(const ComputationStorage& other) = delete; private: - void advanceTo(size_t index) { + void advanceTo(size_t index, bool isMaster) { AD_CONTRACT_CHECK(index <= cachedValues_.size()); if (index != cachedValues_.size()) { + if (!cachedValues_.at(index).has_value()) { + throw IteratorExpired{}; + } return; } + if (masterExists_) { + if (isMaster) { + // TODO wake up condition variable + } else { + // TODO wait for condition variable + } + } if (generatorIterator_.has_value()) { AD_CONTRACT_CHECK(generatorIterator_.value() != generator_.end()); ++generatorIterator_.value(); @@ -60,17 +76,57 @@ class ReusableGenerator { } if (generatorIterator_.value() != generator_.end()) { cachedValues_.emplace_back(std::move(*generatorIterator_.value())); + // False on onSizeChange means the value got too big. + if (onSizeChanged_ && !onSizeChanged_()) { + for (size_t i = 0; i < cachedValues_.size() - 1; i++) { + if (cachedValues_.at(i).has_value()) { + cachedValues_.at(i).reset(); + if (onSizeChanged_()) { + break; + } + } + } + } + } else if (onGeneratorFinished_) { + onGeneratorFinished_(cachedValues_.empty() || + cachedValues_.at(0).has_value()); } } Reference getCachedValue(size_t index) const { - return cachedValues_.at(index); + if (!cachedValues_.at(index).has_value()) { + throw IteratorExpired{}; + } + return cachedValues_.at(index).value(); } bool isDone(size_t index) noexcept { return index == cachedValues_.size() && generatorIterator_.has_value() && generatorIterator_.value() == generator_.end(); } + + void clearMaster() { + AD_CORRECTNESS_CHECK(masterExists_); + masterExists_ = false; + // TODO wake up condition variable + } + + void setOnSizeChanged(std::function onSizeChanged) { + onSizeChanged_ = std::move(onSizeChanged); + } + + void setOnGeneratorFinished(std::function onGeneratorFinished) { + onGeneratorFinished_ = std::move(onGeneratorFinished); + } + + void forEachCachedValue( + const std::invocable auto& function) const { + for (const auto& optional : cachedValues_) { + if (optional.has_value()) { + function(optional.value()); + } + } + } }; std::shared_ptr> computationStorage_; @@ -88,16 +144,26 @@ class ReusableGenerator { class Iterator { size_t currentIndex_ = 0; - std::weak_ptr> storage_; + unique_cleanup::UniqueCleanup< + std::weak_ptr>> + storage_; + bool isMaster_; public: - explicit Iterator(std::weak_ptr> storage) - : storage_{storage} { - storage_.lock()->wlock()->advanceTo(currentIndex_); + explicit Iterator(std::weak_ptr> storage, + bool isMaster) + : storage_{storage, + [isMaster](auto&& storage) { + if (isMaster) { + storage.lock()->wlock()->clearMaster(); + } + }}, + isMaster_{isMaster} { + storage_->lock()->wlock()->advanceTo(currentIndex_, isMaster); } friend bool operator==(const Iterator& it, IteratorSentinel) noexcept { - return !it.storage_.lock()->wlock()->isDone(it.currentIndex_); + return !it.storage_->lock()->wlock()->isDone(it.currentIndex_); } friend bool operator!=(const Iterator& it, IteratorSentinel s) noexcept { @@ -111,9 +177,10 @@ class ReusableGenerator { friend bool operator!=(IteratorSentinel s, const Iterator& it) noexcept { return it != s; } + Iterator& operator++() { ++currentIndex_; - storage_.lock()->wlock()->advanceTo(currentIndex_); + storage_->lock()->wlock()->advanceTo(currentIndex_, isMaster_); return *this; } @@ -121,13 +188,15 @@ class ReusableGenerator { void operator++(int) { (void)operator++(); } Reference operator*() const noexcept { - return storage_.lock()->rlock()->getCachedValue(currentIndex_); + return storage_->lock()->rlock()->getCachedValue(currentIndex_); } Pointer operator->() const noexcept { return std::addressof(operator*()); } }; - Iterator begin() const noexcept { return Iterator{computationStorage_}; } + Iterator begin(bool isMaster = false) const noexcept { + return Iterator{computationStorage_, isMaster}; + } IteratorSentinel end() const noexcept { return IteratorSentinel{}; } From ea8b81f27027cf1e6c3e89c14fbb13c56273286b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 5 May 2024 20:45:17 +0200 Subject: [PATCH 020/133] Fix accidental edit of Usage.md --- benchmark/Usage.md | 111 ++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 73 deletions(-) diff --git a/benchmark/Usage.md b/benchmark/Usage.md index 6846e47d03..5e621b776c 100644 --- a/benchmark/Usage.md +++ b/benchmark/Usage.md @@ -5,35 +5,27 @@ A quick introduction and tutorial for the macro benchmark infrastructure. As of July 2023 the benchmark infrastructure has the following features: - Measuring the execution time of a function in seconds. -- Organizing a benchmark as a single measurement, as a table of measurements, or as a group of single measurements and - tables. +- Organizing a benchmark as a single measurement, as a table of measurements, or as a group of single measurements and tables. - Printing the measured benchmarks and/or exporting them as a JSON formatted file. - Adding metadata information to benchmarks. -- Passing values at runtime via pre-defined configuration options, which can be set either using a JSON file, or per - shorthand in the CLI. +- Passing values at runtime via pre-defined configuration options, which can be set either using a JSON file, or per shorthand in the CLI. -However, support for the prevention of compiler optimization in benchmarks is not available and still in the planning -stage. This can sabotage measured execution times and should be kept in mind, while writing benchmarks. -For example: An expression without a return type and without side effects will get optimized out. Like, for example, -when you are trying to measure a `getter` function without using the returned value. +However, support for the prevention of compiler optimization in benchmarks is not available and still in the planning stage. This can sabotage measured execution times and should be kept in mind, while writing benchmarks. +For example: An expression without a return type and without side effects will get optimized out. Like, for example, when you are trying to measure a `getter` function without using the returned value. # How to write a basic benchmark -This will be a rather undetailed tutorial, because all the functions and classes have their own documentation, which I -do not want to repeat. +This will be a rather undetailed tutorial, because all the functions and classes have their own documentation, which I do not want to repeat. For a quick hands-on example of the general usage and all features, see `benchmark/BenchmarkExamples.cpp`. -Larger collections of benchmarks, or even a single one, are organized into classes, that inherit -from `BenchmarkInterface` in `benchmark/infrastructure/Benchmark.h`. +Larger collections of benchmarks, or even a single one, are organized into classes, that inherit from `BenchmarkInterface` in `benchmark/infrastructure/Benchmark.h`. Those class implementations should have their own `.cpp` file in the folder `benchmark`. ## Writing the class -To write your own class, first include `benchmark/infrastructure/Benchmark.h` in you file. It includes all needed -classes, interfaces and types. -Secondly, you should write your class inside the `ad_benchmark` namespace, where all benchmark infrastructure can be -found. +To write your own class, first include `benchmark/infrastructure/Benchmark.h` in you file. It includes all needed classes, interfaces and types. +Secondly, you should write your class inside the `ad_benchmark` namespace, where all benchmark infrastructure can be found. Now, the interface for benchmark classes has 5 functions: @@ -41,18 +33,15 @@ Now, the interface for benchmark classes has 5 functions: - `getGeneralMetadata` - `runAllBenchmarks` - `getConfigManager` -- `updateDefaultGeneralMetadata` +- `updateDefaultGeneralMetadata` `name` should just return the name of your benchmark class, so that you can easily identify it later. -`getGeneralMetadata` and `getConfigManager`are getters for member variables, that are used for advanced features. So -they can be safely ignored for the time being. +`getGeneralMetadata` and `getConfigManager`are getters for member variables, that are used for advanced features. So they can be safely ignored for the time being. `updateDefaultGeneralMetadata` exists solely for the infrastructure and should be ignored. -`runAllBenchmarks` is where you actually measure your functions using the classes of `BenchmarkMeasurementContainer.h`, -which should be created using `BenchmarkResults`, who will save them and later pass them on for processing by the -infrastructure. +`runAllBenchmarks` is where you actually measure your functions using the classes of `BenchmarkMeasurementContainer.h`, which should be created using `BenchmarkResults`, who will save them and later pass them on for processing by the infrastructure. Which could look like this: ```c++ @@ -110,8 +99,7 @@ BenchmarkResults runAllBenchmarks(){ } ``` -After writing your class, you will have to register it. For that, simply call the macro `AD_REGISTER_BENCHMARK` with -your class name and all needed arguments for construction inside the `ad_benchmark` namespace. +After writing your class, you will have to register it. For that, simply call the macro `AD_REGISTER_BENCHMARK` with your class name and all needed arguments for construction inside the `ad_benchmark` namespace. For example: ```c++ @@ -121,30 +109,24 @@ AD_REGISTER_BENCHMARK(MyClass, ConstructorArgument1, ConstructorArgument2, ...); ## CMake Registering your finished benchmark class with CMake is rather easy. -Simply add the line `addAndLinkBenchmark(MyBenchmarkClassFile)`, without the ending `.cpp`, to the -file `benchmark/CMakeLists.txt`. +Simply add the line `addAndLinkBenchmark(MyBenchmarkClassFile)`, without the ending `.cpp`, to the file `benchmark/CMakeLists.txt`. It will now be compiled. The compiled version can be found inside the `benchmark` folder inside your build directory. # Using advanced benchmark features ## Metadata -Setting metadata is handled by the `BenchmarkMetadata` class. The set metadata information will be included in the -printed output of a compiled benchmark file and in the JSON file export. +Setting metadata is handled by the `BenchmarkMetadata` class. The set metadata information will be included in the printed output of a compiled benchmark file and in the JSON file export. You can find instances of `BenchmarkMetadata` for your usage at 4 locations: -- At `metadata()` of created `ResultEntry` objects, in order to give metadata information about the benchmark - measurement. +- At `metadata()` of created `ResultEntry` objects, in order to give metadata information about the benchmark measurement. - At `metadata()` of created `ResultGroup` objects, in order to give metadata information about the group. -- At `metadata()` of created `Result` objects, in order to give metadata information about the table. +- At `metadata()` of created `ResultTable` objects, in order to give metadata information about the table. -- In your own class, under the getter `getGeneralMetadata()`. The returned member variable exists in order to give more - general metadata information about your benchmark class. This is mostly, so that you don't have to constantly repeat - metadata information, that are true for all the things you are measuring, in other places. For example, this would be - a good place to give the name of an algorithm, if your whole benchmark class is about measuring the runtimes of one. +- In your own class, under the getter `getGeneralMetadata()`. The returned member variable exists in order to give more general metadata information about your benchmark class. This is mostly, so that you don't have to constantly repeat metadata information, that are true for all the things you are measuring, in other places. For example, this would be a good place to give the name of an algorithm, if your whole benchmark class is about measuring the runtimes of one. ## Runtime configuration @@ -153,9 +135,7 @@ Defining the configuration options and passing values to them. ### Adding options -Adding configuration options is done by adding configuration option to the private member variable `manager_`, -accessible via a getter, by using the function `ConfigManager::addOption`. That is best done in the constructor of your -class. +Adding configuration options is done by adding configuration option to the private member variable `manager_`, accessible via a getter, by using the function `ConfigManager::addOption`. That is best done in the constructor of your class. In our system a configuration option is described by a handful of characteristics: @@ -166,23 +146,21 @@ In our system a configuration option is described by a handful of characteristic 3. If it has a default value. If it hasn't, people will always have to provide their own value at run time. 4. What **type** of values it takes. The following types are available: + + - `bool`. + - `std::string`. + - `int`. + - `size_t` + - `float`. + - A `std::vector` of the previous options. - - `bool`. - - `std::string`. - - `int`. - - `size_t` - - `float`. - - A `std::vector` of the previous options. +However, unlike the default value, the value it takes, isn't saved internally. -However, unlike the default value, the value it takes, isn't saved internally. - -Instead, it takes a pointer to a variable of the type, that it itself takes, at construction. Whenever `ConfigOption` -gets set to a value, the variable, for which the pointer was passed, is set to that value. +Instead, it takes a pointer to a variable of the type, that it itself takes, at construction. Whenever `ConfigOption` gets set to a value, the variable, for which the pointer was passed, is set to that value. Note, that also happens at the time of creation, if a default value was given. -In order to organize `ConfigOption`s easier, `ConfigManager` uses JSON like paths, but made up entirely of strings, for -identification. Those are defined at the time of creation of the option and can't be changed later. +In order to organize `ConfigOption`s easier, `ConfigManager` uses JSON like paths, but made up entirely of strings, for identification. Those are defined at the time of creation of the option and can't be changed later. ### Passing values @@ -190,36 +168,23 @@ Setting the values of the configuration options at runtime can be done in two wa 1. Writing a JSON file and passing the file location via CLI. -2. Using the shorthand described in `src/util/ConfigManager/generated/ConfigShorthand.g4`, by writing it directly as an - argument via CLI. Note: The shorthand will overwrite the value of any configuration option, if both ways try to set - it. +2. Using the shorthand described in `src/util/ConfigManager/generated/ConfigShorthand.g4`, by writing it directly as an argument via CLI. Note: The shorthand will overwrite the value of any configuration option, if both ways try to set it. -The shorthand is basically just normal JSON, but adjusted for easier usage. There are 3 big changes. +The shorthand is basically just normal JSON, but adjusted for easier usage. There are 3 big changes. -First, there are no line breaks allowed. The shorthand is build for usage directly in the CLI, so that is an unneeded -feature +First, there are no line breaks allowed. The shorthand is build for usage directly in the CLI, so that is an unneeded feature -Second, because a configuration is always represented by a JSON object, a shorthand string is always treated, as if it -had `{}` braces at the beginning and end. +Second, because a configuration is always represented by a JSON object, a shorthand string is always treated, as if it had `{}` braces at the beginning and end. -Third, the keys of key-value pairs, for example `"key" : value`, don't need to be surrounded with `"`. `"` is a special -symbol in the CLI, and we want to save you the extra work of always typing `\"key\"`. +Third, the keys of key-value pairs, for example `"key" : value`, don't need to be surrounded with `"`. `"` is a special symbol in the CLI, and we want to save you the extra work of always typing `\"key\"`. -Using those two ways of passing information, the configuration options held by an internally created `ConfigManager` -object, will be set. +Using those two ways of passing information, the configuration options held by an internally created `ConfigManager` object, will be set. -In both of those, you have to write out the complete path to your configuration option and write the value, you wish to -set it to, at the end. -For example: Let's say, you defined a configuration option `someNumber` and added it with the -path `tableSizes/someNumber`. Then, if you wanted to set it to `20` using JSON, you would have to write: +In both of those, you have to write out the complete path to your configuration option and write the value, you wish to set it to, at the end. +For example: Let's say, you defined a configuration option `someNumber` and added it with the path `tableSizes/someNumber`. Then, if you wanted to set it to `20` using JSON, you would have to write: ```json -{ - "tableSizes": "some-number" - : - 20 -} +{"tableSizes": "some-number": 20} ``` -However, **if** the passed values can't be interpreted as the correct types for the configuration options, an exception -will be thrown. +However, **if** the passed values can't be interpreted as the correct types for the configuration options, an exception will be thrown. From 50e45298a81e96b4087a1824ff9e06999d49d67b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 6 May 2024 00:56:31 +0200 Subject: [PATCH 021/133] Consume result as master --- src/engine/Operation.cpp | 7 +++++-- src/engine/Result.cpp | 16 ++++++++++++++++ src/engine/Result.h | 2 ++ src/util/IteratorWrapper.h | 37 ++++++++++++++++++++++++++++++++++++ src/util/ReusableGenerator.h | 2 ++ 5 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/util/IteratorWrapper.h diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 856f37565e..15f3bb2b71 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -198,9 +198,12 @@ std::shared_ptr Operation::getResult(bool isRoot, << resultNumCols << std::endl; } - if (result._resultPointer->resultTable()->isDataEvaluated() || - actuallyComputed) { + if (result._resultPointer->resultTable()->isDataEvaluated()) { return result._resultPointer->resultTable(); + } else if (actuallyComputed) { + return std::make_shared( + Result::createResultAsMasterConsumer( + result._resultPointer->resultTable())); } return std::make_shared(Result::createResultWithFallback( result._resultPointer->resultTable(), std::move(computeLambda))); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index ee91d7ccfe..818dd6a423 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -8,6 +8,7 @@ #include "engine/LocalVocab.h" #include "util/Exception.h" +#include "util/IteratorWrapper.h" #include "util/Log.h" // _____________________________________________________________________________ @@ -231,3 +232,18 @@ Result Result::createResultWithFallback(std::shared_ptr original, original->_sortedBy, SharedLocalVocabWrapper{original->localVocab_}}; } + +Result Result::createResultAsMasterConsumer( + std::shared_ptr original) { + using Gen = ad_utility::ReusableGenerator; + AD_CONTRACT_CHECK(std::holds_alternative(original->_idTable)); + auto generator = [](auto original) -> cppcoro::generator { + using ad_utility::IteratorWrapper; + auto& generator = std::get(original->_idTable); + for (const IdTable& idTable : IteratorWrapper{generator, true}) { + co_yield idTable; + } + }; + return Result{generator(std::move(original)), original->_sortedBy, + SharedLocalVocabWrapper{original->localVocab_}}; +} diff --git a/src/engine/Result.h b/src/engine/Result.h index e1ca310f83..b90b3d1a16 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -179,4 +179,6 @@ class Result { static Result createResultWithFallback(std::shared_ptr original, std::function fallback); + + static Result createResultAsMasterConsumer(std::shared_ptr original); }; diff --git a/src/util/IteratorWrapper.h b/src/util/IteratorWrapper.h new file mode 100644 index 0000000000..6a9e4fc9ea --- /dev/null +++ b/src/util/IteratorWrapper.h @@ -0,0 +1,37 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#ifndef ITERATORWRAPPER_H +#define ITERATORWRAPPER_H + +#include + +namespace ad_utility { + +template +class IteratorWrapper { + OriginalIterator& iterator_; + std::tuple args_; + + public: + explicit IteratorWrapper(OriginalIterator& iterator, Args... args) + : iterator_{iterator}, args_{std::move(args)...} {} + + auto begin() { + return std::apply([this](auto... args) { return iterator_.begin(args...); }, + args_); + } + + auto end() { return iterator_.end(); } + + auto& operator++() { return iterator_++; } + + auto& operator*() { return *iterator_; } + + auto operator->() { return std::addressof(operator*()); } +}; + +}; // namespace ad_utility + +#endif // ITERATORWRAPPER_H diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h index 01d23ba3bc..282f55b920 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/ReusableGenerator.h @@ -39,6 +39,8 @@ class ReusableGenerator { cppcoro::generator generator_; std::optional generatorIterator_{}; std::vector> cachedValues_{}; + // TODO make sure we error out when a non-master iterator is + // consumed before the initial master iterator bool masterExists_ = true; std::function onSizeChanged_{}; std::function onGeneratorFinished_{}; From 16eedd8cc5ec20f473272413780f077cc7d3b2c6 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 10 May 2024 20:12:27 +0200 Subject: [PATCH 022/133] Add proper condition variables --- src/engine/Result.h | 1 + src/util/ReusableGenerator.h | 102 +++++++++++++++++++++-------------- 2 files changed, 62 insertions(+), 41 deletions(-) diff --git a/src/engine/Result.h b/src/engine/Result.h index b90b3d1a16..38904aa91f 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -88,6 +88,7 @@ class Result { SharedLocalVocabWrapper localVocab); Result(TableType idTable, std::vector sortedBy, LocalVocab&& localVocab); + // TODO add better overloads than using TableType // Prevent accidental copying of a result table. Result(const Result& other) = delete; diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h index 282f55b920..9919cef969 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/ReusableGenerator.h @@ -6,6 +6,7 @@ #define REUSABLEGENERATOR_H #include +#include #include #include "util/Exception.h" @@ -17,16 +18,7 @@ namespace ad_utility { class IteratorExpired : std::exception {}; -// TODO Plans for this class: Rename this class to cache-aware -// generator or something. Introduce the concept of an "owner" of a generator -// which bounds generation to a maximum storage size, throwing exceptions -// if a non-owning iterator is consuming too slow, and blocking if non-owning -// iterators are too fast. Ownership can expire if the "owning iterator" is -// destroyed. It clears cached values after itself. It needs to be able to hold -// a callback to be called whenever the stored size changes. Also when the -// generator is completely consumed and when the maximum cache size would be -// exceeded if no elements were deleted at the front of the cache to make sure -// this entry is evicted from the cache. +// TODO Rename this class to cache-aware generator or something. template class ReusableGenerator { @@ -34,40 +26,53 @@ class ReusableGenerator { using Reference = const T&; using Pointer = const T*; + enum class MasterIteratorState { NOT_STARTED, MASTER_STARTED, MASTER_DONE }; + class ComputationStorage { friend ReusableGenerator; + mutable std::shared_mutex mutex_; + std::condition_variable_any conditionVariable_; cppcoro::generator generator_; std::optional generatorIterator_{}; std::vector> cachedValues_{}; - // TODO make sure we error out when a non-master iterator is - // consumed before the initial master iterator - bool masterExists_ = true; + MasterIteratorState masterState_ = MasterIteratorState::NOT_STARTED; std::function onSizeChanged_{}; std::function onGeneratorFinished_{}; + public: explicit ComputationStorage(cppcoro::generator generator) : generator_{std::move(generator)} {} - - public: - ComputationStorage(ComputationStorage&& other) = default; + ComputationStorage(ComputationStorage&& other) = delete; ComputationStorage(const ComputationStorage& other) = delete; - ComputationStorage& operator=(ComputationStorage&& other) = default; + ComputationStorage& operator=(ComputationStorage&& other) = delete; ComputationStorage& operator=(const ComputationStorage& other) = delete; private: void advanceTo(size_t index, bool isMaster) { + std::unique_lock lock{mutex_}; AD_CONTRACT_CHECK(index <= cachedValues_.size()); - if (index != cachedValues_.size()) { + // Make sure master iterator does exist and we're not blocking + // indefinitely + if (isMaster) { + AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::MASTER_DONE); + masterState_ = MasterIteratorState::MASTER_STARTED; + } else { + AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::NOT_STARTED); + } + if (index < cachedValues_.size()) { if (!cachedValues_.at(index).has_value()) { throw IteratorExpired{}; } return; } - if (masterExists_) { - if (isMaster) { - // TODO wake up condition variable - } else { - // TODO wait for condition variable + if (masterState_ == MasterIteratorState::MASTER_STARTED) { + if (!isMaster) { + conditionVariable_.wait(lock, [this, index]() { + return (generatorIterator_.has_value() && + generatorIterator_.value() == generator_.end()) || + index < cachedValues_.size(); + }); + return; } } if (generatorIterator_.has_value()) { @@ -93,9 +98,13 @@ class ReusableGenerator { onGeneratorFinished_(cachedValues_.empty() || cachedValues_.at(0).has_value()); } + if (isMaster) { + conditionVariable_.notify_all(); + } } Reference getCachedValue(size_t index) const { + std::shared_lock lock{mutex_}; if (!cachedValues_.at(index).has_value()) { throw IteratorExpired{}; } @@ -103,26 +112,32 @@ class ReusableGenerator { } bool isDone(size_t index) noexcept { + std::shared_lock lock{mutex_}; return index == cachedValues_.size() && generatorIterator_.has_value() && generatorIterator_.value() == generator_.end(); } void clearMaster() { - AD_CORRECTNESS_CHECK(masterExists_); - masterExists_ = false; - // TODO wake up condition variable + std::unique_lock lock{mutex_}; + AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::MASTER_DONE); + masterState_ = MasterIteratorState::MASTER_DONE; + lock.unlock(); + conditionVariable_.notify_all(); } void setOnSizeChanged(std::function onSizeChanged) { + std::lock_guard lock{mutex_}; onSizeChanged_ = std::move(onSizeChanged); } void setOnGeneratorFinished(std::function onGeneratorFinished) { + std::lock_guard lock{mutex_}; onGeneratorFinished_ = std::move(onGeneratorFinished); } void forEachCachedValue( const std::invocable auto& function) const { + std::shared_lock lock{mutex_}; for (const auto& optional : cachedValues_) { if (optional.has_value()) { function(optional.value()); @@ -130,12 +145,12 @@ class ReusableGenerator { } } }; - std::shared_ptr> computationStorage_; + std::shared_ptr computationStorage_; public: explicit ReusableGenerator(cppcoro::generator generator) - : computationStorage_{std::make_shared>( - ComputationStorage{std::move(generator)})} {} + : computationStorage_{ + std::make_shared(std::move(generator))} {} ReusableGenerator(ReusableGenerator&& other) = default; ReusableGenerator(const ReusableGenerator& other) = delete; @@ -146,26 +161,31 @@ class ReusableGenerator { class Iterator { size_t currentIndex_ = 0; - unique_cleanup::UniqueCleanup< - std::weak_ptr>> - storage_; + unique_cleanup::UniqueCleanup> storage_; bool isMaster_; + auto storage() const { + auto pointer = storage_->lock(); + AD_CORRECTNESS_CHECK(pointer); + return pointer; + } + public: - explicit Iterator(std::weak_ptr> storage, - bool isMaster) + explicit Iterator(std::weak_ptr storage, bool isMaster) : storage_{storage, [isMaster](auto&& storage) { if (isMaster) { - storage.lock()->wlock()->clearMaster(); + auto pointer = storage.lock(); + AD_CORRECTNESS_CHECK(pointer); + pointer->clearMaster(); } }}, isMaster_{isMaster} { - storage_->lock()->wlock()->advanceTo(currentIndex_, isMaster); + this->storage()->advanceTo(currentIndex_, isMaster); } friend bool operator==(const Iterator& it, IteratorSentinel) noexcept { - return !it.storage_->lock()->wlock()->isDone(it.currentIndex_); + return !it.storage_->lock()->isDone(it.currentIndex_); } friend bool operator!=(const Iterator& it, IteratorSentinel s) noexcept { @@ -182,7 +202,7 @@ class ReusableGenerator { Iterator& operator++() { ++currentIndex_; - storage_->lock()->wlock()->advanceTo(currentIndex_, isMaster_); + storage()->advanceTo(currentIndex_, isMaster_); return *this; } @@ -190,7 +210,7 @@ class ReusableGenerator { void operator++(int) { (void)operator++(); } Reference operator*() const noexcept { - return storage_->lock()->rlock()->getCachedValue(currentIndex_); + return storage()->getCachedValue(currentIndex_); } Pointer operator->() const noexcept { return std::addressof(operator*()); } @@ -203,8 +223,8 @@ class ReusableGenerator { IteratorSentinel end() const noexcept { return IteratorSentinel{}; } cppcoro::generator extractGenerator() && { - auto lock = computationStorage_->wlock(); - cppcoro::generator result{std::move(lock->generator_)}; + std::unique_lock lock{computationStorage_->mutex_}; + cppcoro::generator result{std::move(computationStorage_->generator_)}; computationStorage_.reset(); return result; } From bf8f085320a6bc0570c27b73c26ef40399798fc5 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 10 May 2024 22:49:59 +0200 Subject: [PATCH 023/133] Implement code that allows for proper recomputation of cache size --- src/engine/Operation.cpp | 13 ++++-- src/engine/QueryExecutionContext.h | 13 +++--- src/engine/Result.cpp | 31 ++++++++++++++ src/engine/Result.h | 8 +++- src/util/Cache.h | 68 +++++++++++++++++++++++++++--- src/util/ConcurrentCache.h | 5 +++ src/util/ReusableGenerator.h | 43 ++++++++++++++----- src/util/UniqueCleanup.h | 1 - 8 files changed, 151 insertions(+), 31 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 15f3bb2b71..d6ac130593 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -167,11 +167,18 @@ std::shared_ptr Operation::getResult(bool isRoot, return result; }; - auto cacheSetup = [this, &computeLambda]() { + auto cacheSetup = [this, &computeLambda, &cache, &cacheKey]() { auto result = computeLambda(); if (!result.isDataEvaluated()) { - // TODO register listeners that make sure cache size is - // properly updated + result.setOnSizeChanged([&cache, cacheKey](bool isShrinkable) { + // TODO find out how to handle pinned entries properly. + auto sizeChange = cache.recomputeSize(cacheKey, !isShrinkable); + if (sizeChange == + ad_utility::ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE) { + return isShrinkable; + } + return false; + }); // TODO serialize into single IdTable if partially computed // all chunks fit into memory and generator is exhausted. } diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 519bd47598..6d65d7393f 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -30,6 +30,9 @@ class CacheValue { : _resultTable(std::make_shared(std::move(resultTable))), _runtimeInfo(std::move(runtimeInfo)) {} + // TODO add destructor that clears listeners from result that might + // be messing with the cache. + const std::shared_ptr& resultTable() const { return _resultTable; } @@ -39,14 +42,8 @@ class CacheValue { // Calculates the `MemorySize` taken up by an instance of `CacheValue`. struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { - // TODO find good solution how to calculate storage requirements - // for generator data, maybe allow later re-calculation current size by - // returning lambda? - if (const auto& tablePtr = cacheValue._resultTable; - tablePtr && tablePtr->isDataEvaluated()) { - return ad_utility::MemorySize::bytes(tablePtr->idTable().size() * - tablePtr->idTable().numColumns() * - sizeof(Id)); + if (const auto& tablePtr = cacheValue._resultTable; tablePtr) { + return tablePtr->getCurrentSize(); } else { return 0_B; } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 818dd6a423..4fbb08a752 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -196,6 +196,36 @@ void Result::logResultSize() const { } } +ad_utility::MemorySize Result::getCurrentSize() const { + auto calculateSize = [](const IdTable& idTable) { + return ad_utility::MemorySize::bytes(idTable.size() * idTable.numColumns() * + sizeof(Id)); + }; + if (isDataEvaluated()) { + return calculateSize(idTable()); + } else { + using Gen = ad_utility::ReusableGenerator; + // This should only ever get called on the "wrapped" generator stored in the + // cache. + AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); + ad_utility::MemorySize totalMemory = 0_B; + std::get(_idTable).forEachCachedValue( + [&totalMemory, &calculateSize](const IdTable& idTable) { + totalMemory += calculateSize(idTable); + }); + return totalMemory; + } +} + +// _____________________________________________________________________________ +void Result::setOnSizeChanged(std::function onSizeChanged) { + using Gen = ad_utility::ReusableGenerator; + // This should only ever get called on the "wrapped" generator stored in the + // cache. + AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); + std::get(_idTable).setOnSizeChanged(std::move(onSizeChanged)); +} + // _____________________________________________________________________________ Result Result::createResultWithFallback(std::shared_ptr original, std::function fallback) { @@ -233,6 +263,7 @@ Result Result::createResultWithFallback(std::shared_ptr original, SharedLocalVocabWrapper{original->localVocab_}}; } +// _____________________________________________________________________________ Result Result::createResultAsMasterConsumer( std::shared_ptr original) { using Gen = ad_utility::ReusableGenerator; diff --git a/src/engine/Result.h b/src/engine/Result.h index 38904aa91f..6d65f08e8c 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -15,6 +15,7 @@ #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "parser/data/LimitOffsetClause.h" +#include "util/MemorySize/MemorySize.h" #include "util/ReusableGenerator.h" // The result of an `Operation`. This is the class QLever uses for all @@ -178,8 +179,13 @@ class Result { // check is succesful. bool checkDefinedness(const VariableToColumnMap& varColMap); + ad_utility::MemorySize getCurrentSize() const; + + void setOnSizeChanged(std::function onSizeChanged); + static Result createResultWithFallback(std::shared_ptr original, std::function fallback); - static Result createResultAsMasterConsumer(std::shared_ptr original); + static Result createResultAsMasterConsumer( + std::shared_ptr original); }; diff --git a/src/util/Cache.h b/src/util/Cache.h index 648e3315d8..cfff716c20 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -31,6 +31,12 @@ using namespace ad_utility::memory_literals; static constexpr auto size_t_max = std::numeric_limits::max(); +enum class ResizeResult { + FITS_IN_CACHE, + EXCEEDS_SINGLE_ENTRY_SIZE, + EXCEEDS_MAX_SIZE +}; + /* @brief Associative array for almost arbitrary keys and values that acts as a cache with fixed memory capacity. @@ -100,6 +106,7 @@ class FlexibleCache { using AccessMap = MapType; using PinnedMap = MapType; + using SizeMap = MapType; using TryEmplaceResult = pair; @@ -173,7 +180,8 @@ class FlexibleCache { return {}; } Score s = _scoreCalculator(*valPtr); - _totalSizeNonPinned += _valueSizeGetter(*valPtr); + _totalSizeNonPinned += sizeOfNewEntry; + _sizeMap.emplace(key, sizeOfNewEntry); auto handle = _entries.insert(std::move(s), Entry(key, std::move(valPtr))); _accessMap[key] = handle; // The first value is the value part of the key-value pair in the priority @@ -203,7 +211,8 @@ class FlexibleCache { // Make room for the new entry. makeRoomIfFits(sizeOfNewEntry); _pinnedMap[key] = valPtr; - _totalSizePinned += _valueSizeGetter(*valPtr); + _totalSizePinned += sizeOfNewEntry; + _sizeMap.emplace(key, sizeOfNewEntry); return valPtr; } @@ -227,6 +236,48 @@ class FlexibleCache { // TODO:: implement this functionality } + ResizeResult recomputeSize(const Key& key, bool removeIfEntryGrewTooBig) { + ResizeResult result = ResizeResult::FITS_IN_CACHE; + auto applySizeDifference = [this, &key, &result, removeIfEntryGrewTooBig]( + MemorySize& variable, bool pinned) { + auto newSize = _valueSizeGetter(*(*this)[key]); + auto& oldSize = _sizeMap.at(key); + // Overflowing if oldSize > newSize is fine here, the math adds up + // nevertheless. + auto sizeDelta = newSize - oldSize; + if (newSize > oldSize) { + if (_maxSizeSingleEntry >= newSize) { + result = ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE; + if (removeIfEntryGrewTooBig && !pinned) { + erase(key); + } + // We don't know how to shrink the size here, so if + // `removeIfEntryGrewTooBig` is false, this needs to be handled by the + // caller. + return; + } + MemorySize pinnedOffset = pinned ? 0_B : _totalSizePinned; + if (_maxSize - pinnedOffset < newSize) { + result = ResizeResult::EXCEEDS_MAX_SIZE; + // We can't fit it in the cache, so remove if not pinned + if (!pinned) { + erase(key); + } + return; + } + } + oldSize += sizeDelta; + variable += sizeDelta; + makeRoomIfFits(0_B); + }; + if (containsPinned(key)) { + applySizeDifference(_totalSizePinned, true); + } else if (containsNonPinned(key)) { + applySizeDifference(_totalSizeNonPinned, false); + } + return result; + } + //! Checks if there is an entry with the given key. bool contains(const Key& key) const { return containsPinned(key) || containsNonPinned(key); @@ -251,7 +302,7 @@ class FlexibleCache { const ValuePtr valuePtr = handle.value().value(); // adapt the sizes of the pinned and non-pinned part of the cache - auto sz = _valueSizeGetter(*valuePtr); + auto sz = _sizeMap.at(key); _totalSizeNonPinned -= sz; _totalSizePinned += sz; // Move the entry to the _pinnedMap and remove it from the non-pinned data @@ -267,7 +318,8 @@ class FlexibleCache { void erase(const Key& key) { const auto pinnedIt = _pinnedMap.find(key); if (pinnedIt != _pinnedMap.end()) { - _totalSizePinned -= _valueSizeGetter(*pinnedIt->second); + _totalSizePinned -= _sizeMap.at(key); + _sizeMap.erase(key); _pinnedMap.erase(pinnedIt); return; } @@ -278,7 +330,8 @@ class FlexibleCache { return; } // the entry exists in the non-pinned part of the cache, erase it. - _totalSizeNonPinned -= _valueSizeGetter(*mapIt->second); + _totalSizeNonPinned -= _sizeMap.at(key); + _sizeMap.erase(key); _entries.erase(std::move(mapIt->second)); _accessMap.erase(mapIt); } @@ -385,8 +438,8 @@ class FlexibleCache { void removeOneEntry() { AD_CONTRACT_CHECK(!_entries.empty()); auto handle = _entries.pop(); - _totalSizeNonPinned = - _totalSizeNonPinned - _valueSizeGetter(*handle.value().value()); + _totalSizeNonPinned -= _sizeMap.at(handle.value().key()); + _sizeMap.erase(handle.value().key()); _accessMap.erase(handle.value().key()); } size_t _maxNumEntries; @@ -402,6 +455,7 @@ class FlexibleCache { ValueSizeGetter _valueSizeGetter; PinnedMap _pinnedMap; AccessMap _accessMap; + SizeMap _sizeMap; }; // Partial instantiation of FlexibleCache using the heap-based priority queue diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index bb195e56be..7c3f37e035 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -197,6 +197,11 @@ class ConcurrentCache { return computeOnceImpl(true, key, computeFunction, onlyReadFromCache); } + auto recomputeSize(const Key& key, bool removeIfEntryGrewTooBig) { + return _cacheAndInProgressMap.wlock()->_cache.recomputeSize( + key, removeIfEntryGrewTooBig); + } + /// Clear the cache (but not the pinned entries) void clearUnpinnedOnly() { _cacheAndInProgressMap.wlock()->_cache.clearUnpinnedOnly(); diff --git a/src/util/ReusableGenerator.h b/src/util/ReusableGenerator.h index 9919cef969..9df17af099 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/ReusableGenerator.h @@ -36,7 +36,9 @@ class ReusableGenerator { std::optional generatorIterator_{}; std::vector> cachedValues_{}; MasterIteratorState masterState_ = MasterIteratorState::NOT_STARTED; - std::function onSizeChanged_{}; + // Returns true if cache needs to shrink, accepts a parameter that tells the + // callback if we actually can shrink + std::function onSizeChanged_{}; std::function onGeneratorFinished_{}; public: @@ -83,15 +85,9 @@ class ReusableGenerator { } if (generatorIterator_.value() != generator_.end()) { cachedValues_.emplace_back(std::move(*generatorIterator_.value())); - // False on onSizeChange means the value got too big. - if (onSizeChanged_ && !onSizeChanged_()) { - for (size_t i = 0; i < cachedValues_.size() - 1; i++) { - if (cachedValues_.at(i).has_value()) { - cachedValues_.at(i).reset(); - if (onSizeChanged_()) { - break; - } - } + if (onSizeChanged_) { + if (onSizeChanged_(true)) { + tryShrinkCache(); } } } else if (onGeneratorFinished_) { @@ -125,7 +121,7 @@ class ReusableGenerator { conditionVariable_.notify_all(); } - void setOnSizeChanged(std::function onSizeChanged) { + void setOnSizeChanged(std::function onSizeChanged) { std::lock_guard lock{mutex_}; onSizeChanged_ = std::move(onSizeChanged); } @@ -144,6 +140,23 @@ class ReusableGenerator { } } } + + void tryShrinkCache() { + size_t maxBound = cachedValues_.size() - 1; + for (size_t i = 0; i < maxBound; i++) { + if (cachedValues_.at(i).has_value()) { + cachedValues_.at(i).reset(); + if (onSizeChanged_) { + bool isShrinkable = i < maxBound - 1; + if (onSizeChanged_(isShrinkable)) { + AD_CONTRACT_CHECK(!isShrinkable); + } else { + break; + } + } + } + } + } }; std::shared_ptr computationStorage_; @@ -228,6 +241,14 @@ class ReusableGenerator { computationStorage_.reset(); return result; } + + void forEachCachedValue(const std::invocable auto& function) const { + computationStorage_->forEachCachedValue(function); + } + + void setOnSizeChanged(std::function onSizeChanged) { + computationStorage_->setOnSizeChanged(std::move(onSizeChanged)); + } }; }; // namespace ad_utility diff --git a/src/util/UniqueCleanup.h b/src/util/UniqueCleanup.h index d8e41fb75c..77305029ad 100644 --- a/src/util/UniqueCleanup.h +++ b/src/util/UniqueCleanup.h @@ -7,7 +7,6 @@ #include #include -#include #include "util/ResetWhenMoved.h" From 771eb5bba8717443d5893fb75f93d3e94ca176b4 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 12 May 2024 22:17:59 +0200 Subject: [PATCH 024/133] Refactor a bit --- src/engine/Join.cpp | 5 +++-- src/engine/Operation.cpp | 15 ++++++++------- src/engine/Operation.h | 17 ++++++++++------- src/engine/QueryExecutionContext.h | 10 +++++++--- src/engine/QueryExecutionTree.h | 4 +++- src/engine/Result.cpp | 8 ++++---- src/engine/Result.h | 4 ++-- ...ReusableGenerator.h => CacheableGenerator.h} | 16 +++++++--------- test/OperationTest.cpp | 8 ++++---- 9 files changed, 48 insertions(+), 39 deletions(-) rename src/util/{ReusableGenerator.h => CacheableGenerator.h} (94%) diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 89863a9c51..e2b597d234 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -122,8 +122,9 @@ Result Join::computeResult([[maybe_unused]] bool requestLaziness) { // The third argument means "only get the result if it can be read from the // cache". So effectively, this returns the result if it is small, contains // UNDEF values, or is contained in the cache, otherwise `nullptr`. - return tree.getRootOperation()->getResult(false, - !(isSmall || containsUndef)); + return tree.getRootOperation()->getResult( + false, (isSmall || containsUndef) ? ComputationMode::CACHE_ONLY + : ComputationMode::FULL); }; auto leftResIfCached = getCachedOrSmallResult(*_left, _leftJoinCol); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index d6ac130593..fa72723ebd 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -70,10 +70,8 @@ void Operation::recursivelySetTimeConstraint( } // ________________________________________________________________________ -std::shared_ptr Operation::getResult(bool isRoot, - bool onlyReadFromCache, - bool requestLaziness) { - AD_CONTRACT_CHECK(!onlyReadFromCache || !requestLaziness); +std::shared_ptr Operation::getResult( + bool isRoot, ComputationMode computationMode) { ad_utility::Timer timer{ad_utility::Timer::Started}; if (isRoot) { @@ -123,13 +121,14 @@ std::shared_ptr Operation::getResult(bool isRoot, } }); bool actuallyComputed = false; - auto computeLambda = [this, &timer, requestLaziness, &actuallyComputed] { + auto computeLambda = [this, &timer, computationMode, &actuallyComputed] { checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); - Result result = computeResult(requestLaziness); + Result result = computeResult(computationMode == ComputationMode::LAZY); actuallyComputed = true; - AD_CONTRACT_CHECK(requestLaziness || result.isDataEvaluated()); + AD_CONTRACT_CHECK(computationMode == ComputationMode::LAZY || + result.isDataEvaluated()); checkCancellation(); // Compute the datatypes that occur in each column of the result. @@ -185,6 +184,8 @@ std::shared_ptr Operation::getResult(bool isRoot, return CacheValue{std::move(result), runtimeInfo()}; }; + bool onlyReadFromCache = computationMode == ComputationMode::CACHE_ONLY; + auto result = pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, onlyReadFromCache) diff --git a/src/engine/Operation.h b/src/engine/Operation.h index fd3a66d5da..665c0fb3f2 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -21,6 +21,8 @@ // forward declaration needed to break dependencies class QueryExecutionTree; +enum class ComputationMode { FULL, CACHE_ONLY, LAZY }; + class Operation { using SharedCancellationHandle = ad_utility::SharedCancellationHandle; using Milliseconds = std::chrono::milliseconds; @@ -144,15 +146,17 @@ class Operation { * @param isRoot Has be set to `true` iff this is the root operation of a * complete query to obtain the expected behavior wrt cache pinning and * runtime information in error cases. - * @param onlyReadFromCache If set to true the result is only returned if it - * can be read from the cache without any computation. If the result is not in - * the cache, `nullptr` will be returned. + * @param computationMode If set to `CACHE_ONLY` the result is only returned + * if it can be read from the cache without any computation. If the result is + * not in the cache, `nullptr` will be returned. If set to `LAZY` this will + * request the result to be computable at request in chunks. If the operation + * does not support this, it will do nothing. * @return A shared pointer to the result. May only be `nullptr` if * `onlyReadFromCache` is true. */ - std::shared_ptr getResult(bool isRoot = false, - bool onlyReadFromCache = false, - bool requestLaziness = false); + std::shared_ptr getResult( + bool isRoot = false, + ComputationMode computationMode = ComputationMode::FULL); // Use the same cancellation handle for all children of an operation (= query // plan rooted at that operation). As soon as one child is aborted, the whole @@ -249,7 +253,6 @@ class Operation { private: //! Compute the result of the query-subtree rooted at this element.. - // TODO turn bool into enum maybe? virtual Result computeResult(bool requestLaziness) = 0; // Create and store the complete runtime information for this operation after diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 6d65d7393f..3c2b0884d1 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -30,15 +30,19 @@ class CacheValue { : _resultTable(std::make_shared(std::move(resultTable))), _runtimeInfo(std::move(runtimeInfo)) {} - // TODO add destructor that clears listeners from result that might - // be messing with the cache. - const std::shared_ptr& resultTable() const { return _resultTable; } const RuntimeInformation& runtimeInfo() const { return _runtimeInfo; } + ~CacheValue() { + if (!_resultTable->isDataEvaluated()) { + // Clear listeners + const_cast(*_resultTable).setOnSizeChanged({}); + } + } + // Calculates the `MemorySize` taken up by an instance of `CacheValue`. struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index f54cf83c3c..64d94e0093 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -52,7 +52,9 @@ class QueryExecutionTree { size_t getResultWidth() const { return rootOperation_->getResultWidth(); } std::shared_ptr getResult(bool requestLaziness = false) const { - return rootOperation_->getResult(isRoot(), false, requestLaziness); + return rootOperation_->getResult(isRoot(), requestLaziness + ? ComputationMode::LAZY + : ComputationMode::FULL); } // A variable, its column index in the Id space result, and the `ResultType` diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 4fbb08a752..e2363f303b 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -94,7 +94,7 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // modification here AD_CONTRACT_CHECK( !std::holds_alternative>(_idTable)); - using Gen = ad_utility::ReusableGenerator; + using Gen = ad_utility::CacheableGenerator; if (std::holds_alternative(_idTable)) { modifyIdTable(std::get(_idTable), limitOffset); } else if (std::holds_alternative(_idTable)) { @@ -204,7 +204,7 @@ ad_utility::MemorySize Result::getCurrentSize() const { if (isDataEvaluated()) { return calculateSize(idTable()); } else { - using Gen = ad_utility::ReusableGenerator; + using Gen = ad_utility::CacheableGenerator; // This should only ever get called on the "wrapped" generator stored in the // cache. AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); @@ -219,7 +219,7 @@ ad_utility::MemorySize Result::getCurrentSize() const { // _____________________________________________________________________________ void Result::setOnSizeChanged(std::function onSizeChanged) { - using Gen = ad_utility::ReusableGenerator; + using Gen = ad_utility::CacheableGenerator; // This should only ever get called on the "wrapped" generator stored in the // cache. AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); @@ -266,7 +266,7 @@ Result Result::createResultWithFallback(std::shared_ptr original, // _____________________________________________________________________________ Result Result::createResultAsMasterConsumer( std::shared_ptr original) { - using Gen = ad_utility::ReusableGenerator; + using Gen = ad_utility::CacheableGenerator; AD_CONTRACT_CHECK(std::holds_alternative(original->_idTable)); auto generator = [](auto original) -> cppcoro::generator { using ad_utility::IteratorWrapper; diff --git a/src/engine/Result.h b/src/engine/Result.h index 6d65f08e8c..09f1afe97f 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -15,8 +15,8 @@ #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "parser/data/LimitOffsetClause.h" +#include "util/CacheableGenerator.h" #include "util/MemorySize/MemorySize.h" -#include "util/ReusableGenerator.h" // The result of an `Operation`. This is the class QLever uses for all // intermediate or final results when processing a SPARQL query. The actual data @@ -24,7 +24,7 @@ class Result { private: using TableType = - std::variant, + std::variant, cppcoro::generator>; // The actual entries. Since generators need to be modified // in order to be consumed, this needs to be mutable. diff --git a/src/util/ReusableGenerator.h b/src/util/CacheableGenerator.h similarity index 94% rename from src/util/ReusableGenerator.h rename to src/util/CacheableGenerator.h index 9df17af099..37b12459a3 100644 --- a/src/util/ReusableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -18,10 +18,8 @@ namespace ad_utility { class IteratorExpired : std::exception {}; -// TODO Rename this class to cache-aware generator or something. - template -class ReusableGenerator { +class CacheableGenerator { using GenIterator = typename cppcoro::generator::iterator; using Reference = const T&; using Pointer = const T*; @@ -29,7 +27,7 @@ class ReusableGenerator { enum class MasterIteratorState { NOT_STARTED, MASTER_STARTED, MASTER_DONE }; class ComputationStorage { - friend ReusableGenerator; + friend CacheableGenerator; mutable std::shared_mutex mutex_; std::condition_variable_any conditionVariable_; cppcoro::generator generator_; @@ -161,14 +159,14 @@ class ReusableGenerator { std::shared_ptr computationStorage_; public: - explicit ReusableGenerator(cppcoro::generator generator) + explicit CacheableGenerator(cppcoro::generator generator) : computationStorage_{ std::make_shared(std::move(generator))} {} - ReusableGenerator(ReusableGenerator&& other) = default; - ReusableGenerator(const ReusableGenerator& other) = delete; - ReusableGenerator& operator=(ReusableGenerator&& other) = default; - ReusableGenerator& operator=(const ReusableGenerator& other) = delete; + CacheableGenerator(CacheableGenerator&& other) = default; + CacheableGenerator(const CacheableGenerator& other) = delete; + CacheableGenerator& operator=(CacheableGenerator&& other) = default; + CacheableGenerator& operator=(const CacheableGenerator& other) = delete; class IteratorSentinel {}; diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 8ad6a67d37..d44d331332 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -39,7 +39,7 @@ TEST(OperationTest, getResultOnlyCached) { NeutralElementOperation n{qec}; // The second `true` means "only read the result if it was cached". // We have just cleared the cache, and so this should return `nullptr`. - EXPECT_EQ(n.getResult(true, true), nullptr); + EXPECT_EQ(n.getResult(true, ComputationMode::CACHE_ONLY), nullptr); EXPECT_EQ(n.runtimeInfo().status_, RuntimeInformation::Status::notStarted); // Nothing has been stored in the cache by this call. EXPECT_EQ(qec->getQueryTreeCache().numNonPinnedEntries(), 0); @@ -58,7 +58,7 @@ TEST(OperationTest, getResultOnlyCached) { // When we now request to only return the result if it is cached, we should // get exactly the same `shared_ptr` as with the previous call. NeutralElementOperation n3{qec}; - EXPECT_EQ(n3.getResult(true, true), result); + EXPECT_EQ(n3.getResult(true, ComputationMode::CACHE_ONLY), result); EXPECT_EQ(n3.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedNotPinned); @@ -67,7 +67,7 @@ TEST(OperationTest, getResultOnlyCached) { QueryExecutionContext qecCopy{*qec}; qecCopy._pinResult = true; NeutralElementOperation n4{&qecCopy}; - EXPECT_EQ(n4.getResult(true, true), result); + EXPECT_EQ(n4.getResult(true, ComputationMode::CACHE_ONLY), result); // The cache status is `cachedNotPinned` because we found the element cached // but not pinned (it does reflect the status BEFORE the operation). @@ -79,7 +79,7 @@ TEST(OperationTest, getResultOnlyCached) { // We have pinned the result, so requesting it again should return a pinned // result. qecCopy._pinResult = false; - EXPECT_EQ(n4.getResult(true, true), result); + EXPECT_EQ(n4.getResult(true, ComputationMode::CACHE_ONLY), result); EXPECT_EQ(n4.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedPinned); From 8aa9060c24a405dc64ebce6b098731fb77399b1b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 13 May 2024 00:29:52 +0200 Subject: [PATCH 025/133] Aggregate tables at the end of lazy results --- src/engine/Operation.cpp | 11 ++++++-- src/engine/QueryExecutionContext.h | 1 + src/engine/Result.cpp | 40 +++++++++++++++++++++++++++++- src/engine/Result.h | 4 +++ src/util/Cache.h | 19 ++++++++++++++ src/util/CacheableGenerator.h | 5 ++++ src/util/ConcurrentCache.h | 8 ++++++ 7 files changed, 85 insertions(+), 3 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index fa72723ebd..836385c4b8 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -178,8 +178,15 @@ std::shared_ptr Operation::getResult( } return false; }); - // TODO serialize into single IdTable if partially computed - // all chunks fit into memory and generator is exhausted. + result.setOnGeneratorFinished( + [&cache, cacheKey](bool isComplete) mutable { + if (isComplete) { + cache.transformValue(cacheKey, [](const CacheValue& oldValue) { + return CacheValue{oldValue.resultTable()->aggregateTable(), + oldValue.runtimeInfo()}; + }); + } + }); } return CacheValue{std::move(result), runtimeInfo()}; }; diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 3c2b0884d1..423f17f346 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -40,6 +40,7 @@ class CacheValue { if (!_resultTable->isDataEvaluated()) { // Clear listeners const_cast(*_resultTable).setOnSizeChanged({}); + const_cast(*_resultTable).setOnGeneratorFinished({}); } } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index e2363f303b..f0169e6b0f 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -42,7 +42,8 @@ Result::Result(TableType idTable, std::vector sortedBy, _sortedBy{std::move(sortedBy)}, localVocab_{std::move(localVocab.localVocab_)} { AD_CONTRACT_CHECK(localVocab_ != nullptr); - // TODO move checks into generators if possible + // TODO move checks into generators if possible, check all usages of + // isDataEvaluated AD_CONTRACT_CHECK(!isDataEvaluated() || std::ranges::all_of(_sortedBy, [this](size_t numCols) { return numCols < this->idTable().numColumns(); @@ -226,6 +227,43 @@ void Result::setOnSizeChanged(std::function onSizeChanged) { std::get(_idTable).setOnSizeChanged(std::move(onSizeChanged)); } +// _____________________________________________________________________________ +void Result::setOnGeneratorFinished( + std::function onGeneratorFinished) { + using Gen = ad_utility::CacheableGenerator; + // This should only ever get called on the "wrapped" generator stored in the + // cache. + AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); + std::get(_idTable).setOnGeneratorFinished( + std::move(onGeneratorFinished)); +} + +Result Result::aggregateTable() const { + using Gen = ad_utility::CacheableGenerator; + AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); + size_t totalRows = 0; + size_t numCols = 0; + std::optional allocator; + std::get(_idTable).forEachCachedValue( + [&totalRows, &numCols, &allocator](const IdTable& table) { + totalRows += table.numRows(); + if (numCols == 0) { + numCols = table.numColumns(); + } + if (!allocator.has_value()) { + allocator = table.getAllocator(); + } + }); + IdTable idTable{ + numCols, std::move(allocator).value_or(makeAllocatorWithLimit(0_B))}; + idTable.reserve(totalRows); + std::get(_idTable).forEachCachedValue([&idTable](const IdTable& table) { + idTable.insertAtEnd(table.begin(), table.end()); + }); + return Result{std::move(idTable), _sortedBy, + SharedLocalVocabWrapper{localVocab_}}; +} + // _____________________________________________________________________________ Result Result::createResultWithFallback(std::shared_ptr original, std::function fallback) { diff --git a/src/engine/Result.h b/src/engine/Result.h index 09f1afe97f..ba23e2a6ab 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -183,6 +183,10 @@ class Result { void setOnSizeChanged(std::function onSizeChanged); + void setOnGeneratorFinished(std::function onGeneratorFinished); + + Result aggregateTable() const; + static Result createResultWithFallback(std::shared_ptr original, std::function fallback); diff --git a/src/util/Cache.h b/src/util/Cache.h index cfff716c20..9664a9e561 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -278,6 +278,25 @@ class FlexibleCache { return result; } + void transformValue( + const Key& key, + const InvocableWithExactReturnType auto& + transformer) { + bool pinned = false; + if (containsPinned(key)) { + pinned = true; + } else if (!containsNonPinned(key)) { + return; + } + auto transformedValue = transformer(*(*this)[key]); + erase(key); + if (pinned) { + insertPinned(key, std::move(transformedValue)); + } else { + insert(key, std::move(transformedValue)); + } + } + //! Checks if there is an entry with the given key. bool contains(const Key& key) const { return containsPinned(key) || containsNonPinned(key); diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 37b12459a3..952ac58978 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -75,6 +75,7 @@ class CacheableGenerator { return; } } + // TODO track processing time to update stats. if (generatorIterator_.has_value()) { AD_CONTRACT_CHECK(generatorIterator_.value() != generator_.end()); ++generatorIterator_.value(); @@ -247,6 +248,10 @@ class CacheableGenerator { void setOnSizeChanged(std::function onSizeChanged) { computationStorage_->setOnSizeChanged(std::move(onSizeChanged)); } + + void setOnGeneratorFinished(std::function onGeneratorFinished) { + computationStorage_->setOnGeneratorFinished(std::move(onGeneratorFinished)); + } }; }; // namespace ad_utility diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 7c3f37e035..7b46e6e528 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -202,6 +202,14 @@ class ConcurrentCache { key, removeIfEntryGrewTooBig); } + void transformValue( + const Key& key, + const InvocableWithExactReturnType auto& + transformer) { + return _cacheAndInProgressMap.wlock()->_cache.transformValue(key, + transformer); + } + /// Clear the cache (but not the pinned entries) void clearUnpinnedOnly() { _cacheAndInProgressMap.wlock()->_cache.clearUnpinnedOnly(); From b499c6e3feea128cbf1bdc8b0695448db63bd141 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 17 May 2024 17:49:51 +0200 Subject: [PATCH 026/133] Overload constructor of Result class --- src/engine/Operation.cpp | 5 +- src/engine/Result.cpp | 147 ++++++++++++++++++++++++++------------- src/engine/Result.h | 28 +++++--- 3 files changed, 118 insertions(+), 62 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 836385c4b8..5248517f7a 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -159,9 +159,8 @@ std::shared_ptr Operation::getResult( // runtime if neither a LIMIT nor an OFFSET were specified. result.applyLimitOffset(_limit); runtimeInfo().addLimitOffsetRow(_limit, limitTimer.msecs(), true); - } else if (result.isDataEvaluated()) { - AD_CONTRACT_CHECK(result.idTable().numRows() == - _limit.actualSize(result.idTable().numRows())); + } else { + result.enforceLimitOffset(_limit); } return result; }; diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index f0169e6b0f..74102c6e62 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -36,38 +36,68 @@ auto Result::getMergedLocalVocab(const Result& resultTable1, LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } // _____________________________________________________________________________ -Result::Result(TableType idTable, std::vector sortedBy, - SharedLocalVocabWrapper localVocab) - : _idTable{std::move(idTable)}, - _sortedBy{std::move(sortedBy)}, - localVocab_{std::move(localVocab.localVocab_)} { - AD_CONTRACT_CHECK(localVocab_ != nullptr); - // TODO move checks into generators if possible, check all usages of - // isDataEvaluated - AD_CONTRACT_CHECK(!isDataEvaluated() || - std::ranges::all_of(_sortedBy, [this](size_t numCols) { - return numCols < this->idTable().numColumns(); - })); +void Result::validateIdTable(const IdTable& idTable) const { + AD_CONTRACT_CHECK(std::ranges::all_of(sortedBy_, [&idTable](size_t numCols) { + return numCols < idTable.numColumns(); + })); [[maybe_unused]] auto compareRowsByJoinColumns = [this](const auto& row1, const auto& row2) { - for (size_t col : this->sortedBy()) { + for (size_t col : sortedBy_) { if (row1[col] != row2[col]) { return row1[col] < row2[col]; } } return false; }; - AD_EXPENSIVE_CHECK( - !isDataEvaluated() || - std::ranges::is_sorted(this->idTable(), compareRowsByJoinColumns)); + AD_EXPENSIVE_CHECK(std::ranges::is_sorted(idTable, compareRowsByJoinColumns)); +} + +// _____________________________________________________________________________ +Result::Result(IdTable idTable, std::vector sortedBy, + SharedLocalVocabWrapper localVocab) + : data_{std::move(idTable)}, + sortedBy_{std::move(sortedBy)}, + localVocab_{std::move(localVocab.localVocab_)} { + AD_CONTRACT_CHECK(localVocab_ != nullptr); + validateIdTable(std::get(data_)); } // _____________________________________________________________________________ -Result::Result(TableType idTable, std::vector sortedBy, +Result::Result(IdTable idTable, std::vector sortedBy, LocalVocab&& localVocab) - : Result(std::move(idTable), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}) {} + : Result{std::move(idTable), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}} {} + +// _____________________________________________________________________________ +Result::Result(cppcoro::generator idTables, + std::vector sortedBy, + SharedLocalVocabWrapper localVocab) + : data_{ad_utility::CacheableGenerator{ + [this, idTables = std::move( + idTables)]() mutable -> cppcoro::generator { + for (IdTable& idTable : idTables) { + validateIdTable(idTable); + co_yield std::move(idTable); + } + }()}}, + sortedBy_{std::move(sortedBy)}, + localVocab_{std::move(localVocab.localVocab_)} { + AD_CONTRACT_CHECK(localVocab_ != nullptr); +} + +// _____________________________________________________________________________ +Result::Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocab&& localVocab) + : Result{std::move(idTables), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}} {} + +// _____________________________________________________________________________ +Result::Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocabPtr localVocab) + : data_{std::move(idTables)}, + sortedBy_{std::move(sortedBy)}, + localVocab_{std::move(localVocab)} {} // _____________________________________________________________________________ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { @@ -85,20 +115,17 @@ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { idTable.shrinkToFit(); } -// TODO add unit tests for this // _____________________________________________________________________________ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). - // TODO make limit its own dedicated operation to avoid this - // modification here AD_CONTRACT_CHECK( - !std::holds_alternative>(_idTable)); + !std::holds_alternative>(data_)); using Gen = ad_utility::CacheableGenerator; - if (std::holds_alternative(_idTable)) { - modifyIdTable(std::get(_idTable), limitOffset); - } else if (std::holds_alternative(_idTable)) { + if (std::holds_alternative(data_)) { + modifyIdTable(std::get(data_), limitOffset); + } else if (std::holds_alternative(data_)) { auto generator = [](cppcoro::generator original, LimitOffsetClause limitOffset) -> cppcoro::generator { @@ -120,8 +147,32 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { break; } } - }(std::move(std::get(_idTable)).extractGenerator(), limitOffset); - _idTable.emplace(std::move(generator)); + }(std::move(std::get(data_)).extractGenerator(), limitOffset); + data_.emplace(std::move(generator)); + } else { + AD_FAIL(); + } +} + +// _____________________________________________________________________________ +void Result::enforceLimitOffset(const LimitOffsetClause& limitOffset) { + AD_CONTRACT_CHECK( + !std::holds_alternative>(data_)); + using Gen = ad_utility::CacheableGenerator; + if (std::holds_alternative(data_)) { + AD_CONTRACT_CHECK(idTable().numRows() == + limitOffset.actualSize(idTable().numRows())); + } else if (std::holds_alternative(data_)) { + auto generator = + [](cppcoro::generator original, + LimitOffsetClause limitOffset) -> cppcoro::generator { + for (auto&& idTable : original) { + AD_CONTRACT_CHECK(idTable.numRows() == + limitOffset.actualSize(idTable.numRows())); + co_yield std::move(idTable); + } + }(std::move(std::get(data_)).extractGenerator(), limitOffset); + data_.emplace(std::move(generator)); } else { AD_FAIL(); } @@ -133,7 +184,7 @@ auto Result::getOrComputeDatatypeCountsPerColumn() if (datatypeCountsPerColumn_.has_value()) { return datatypeCountsPerColumn_.value(); } - auto& idTable = std::get(_idTable); + auto& idTable = std::get(data_); auto& types = datatypeCountsPerColumn_.emplace(); types.resize(idTable.numColumns()); for (size_t i = 0; i < idTable.numColumns(); ++i) { @@ -162,7 +213,7 @@ bool Result::checkDefinedness(const VariableToColumnMap& varColMap) { // _____________________________________________________________________________ const IdTable& Result::idTable() const { AD_CONTRACT_CHECK(isDataEvaluated()); - return std::get(_idTable); + return std::get(data_); } // _____________________________________________________________________________ @@ -179,12 +230,12 @@ cppcoro::generator Result::idTables() const { AD_FAIL(); } }, - _idTable); + data_); } // _____________________________________________________________________________ bool Result::isDataEvaluated() const { - return std::holds_alternative(_idTable); + return std::holds_alternative(data_); } // _____________________________________________________________________________ @@ -208,9 +259,9 @@ ad_utility::MemorySize Result::getCurrentSize() const { using Gen = ad_utility::CacheableGenerator; // This should only ever get called on the "wrapped" generator stored in the // cache. - AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); + AD_CONTRACT_CHECK(std::holds_alternative(data_)); ad_utility::MemorySize totalMemory = 0_B; - std::get(_idTable).forEachCachedValue( + std::get(data_).forEachCachedValue( [&totalMemory, &calculateSize](const IdTable& idTable) { totalMemory += calculateSize(idTable); }); @@ -223,8 +274,8 @@ void Result::setOnSizeChanged(std::function onSizeChanged) { using Gen = ad_utility::CacheableGenerator; // This should only ever get called on the "wrapped" generator stored in the // cache. - AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); - std::get(_idTable).setOnSizeChanged(std::move(onSizeChanged)); + AD_CONTRACT_CHECK(std::holds_alternative(data_)); + std::get(data_).setOnSizeChanged(std::move(onSizeChanged)); } // _____________________________________________________________________________ @@ -233,18 +284,17 @@ void Result::setOnGeneratorFinished( using Gen = ad_utility::CacheableGenerator; // This should only ever get called on the "wrapped" generator stored in the // cache. - AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); - std::get(_idTable).setOnGeneratorFinished( - std::move(onGeneratorFinished)); + AD_CONTRACT_CHECK(std::holds_alternative(data_)); + std::get(data_).setOnGeneratorFinished(std::move(onGeneratorFinished)); } Result Result::aggregateTable() const { using Gen = ad_utility::CacheableGenerator; - AD_CONTRACT_CHECK(std::holds_alternative(_idTable)); + AD_CONTRACT_CHECK(std::holds_alternative(data_)); size_t totalRows = 0; size_t numCols = 0; std::optional allocator; - std::get(_idTable).forEachCachedValue( + std::get(data_).forEachCachedValue( [&totalRows, &numCols, &allocator](const IdTable& table) { totalRows += table.numRows(); if (numCols == 0) { @@ -257,10 +307,10 @@ Result Result::aggregateTable() const { IdTable idTable{ numCols, std::move(allocator).value_or(makeAllocatorWithLimit(0_B))}; idTable.reserve(totalRows); - std::get(_idTable).forEachCachedValue([&idTable](const IdTable& table) { + std::get(data_).forEachCachedValue([&idTable](const IdTable& table) { idTable.insertAtEnd(table.begin(), table.end()); }); - return Result{std::move(idTable), _sortedBy, + return Result{std::move(idTable), sortedBy_, SharedLocalVocabWrapper{localVocab_}}; } @@ -297,22 +347,21 @@ Result Result::createResultWithFallback(std::shared_ptr original, } }; return Result{generator(std::move(original), std::move(fallback)), - original->_sortedBy, - SharedLocalVocabWrapper{original->localVocab_}}; + original->sortedBy_, original->localVocab_}; } // _____________________________________________________________________________ Result Result::createResultAsMasterConsumer( std::shared_ptr original) { using Gen = ad_utility::CacheableGenerator; - AD_CONTRACT_CHECK(std::holds_alternative(original->_idTable)); + AD_CONTRACT_CHECK(std::holds_alternative(original->data_)); auto generator = [](auto original) -> cppcoro::generator { using ad_utility::IteratorWrapper; - auto& generator = std::get(original->_idTable); + auto& generator = std::get(original->data_); for (const IdTable& idTable : IteratorWrapper{generator, true}) { co_yield idTable; } }; - return Result{generator(std::move(original)), original->_sortedBy, - SharedLocalVocabWrapper{original->localVocab_}}; + return Result{generator(std::move(original)), original->sortedBy_, + original->localVocab_}; } diff --git a/src/engine/Result.h b/src/engine/Result.h index ba23e2a6ab..cac6f2fc89 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -23,16 +23,15 @@ // is always a table and contained in the member `idTable()`. class Result { private: - using TableType = - std::variant, - cppcoro::generator>; + using Data = std::variant, + cppcoro::generator>; // The actual entries. Since generators need to be modified // in order to be consumed, this needs to be mutable. - mutable TableType _idTable; + mutable Data data_; // The column indices by which the result is sorted (primary sort key first). // Empty if the result is not sorted on any column. - std::vector _sortedBy; + std::vector sortedBy_; using LocalVocabPtr = std::shared_ptr; // The local vocabulary of the result. @@ -74,6 +73,10 @@ class Result { using DatatypeCountsPerColumn = std::vector< std::array(Datatype::MaxValue) + 1>>; std::optional datatypeCountsPerColumn_; + Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocabPtr localVocab); + + void validateIdTable(const IdTable& idTable) const; public: // Construct from the given arguments (see above) and check the following @@ -85,11 +88,14 @@ class Result { // The first overload of the constructor is for local vocabs that are shared // with another `Result` via the `getSharedLocalVocab...` methods below. // The second overload is for newly created local vocabularies. - Result(TableType idTable, std::vector sortedBy, + Result(IdTable idTable, std::vector sortedBy, SharedLocalVocabWrapper localVocab); - Result(TableType idTable, std::vector sortedBy, + Result(IdTable idTable, std::vector sortedBy, LocalVocab&& localVocab); - // TODO add better overloads than using TableType + Result(cppcoro::generator idTables, + std::vector sortedBy, SharedLocalVocabWrapper localVocab); + Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocab&& localVocab); // Prevent accidental copying of a result table. Result(const Result& other) = delete; @@ -109,7 +115,7 @@ class Result { cppcoro::generator idTables() const; // Const access to the columns by which the `idTable()` is sorted. - const std::vector& sortedBy() const { return _sortedBy; } + const std::vector& sortedBy() const { return sortedBy_; } // Get the local vocabulary of this result, used for lookup only. // @@ -168,6 +174,8 @@ class Result { // those are still correct after performing this operation. void applyLimitOffset(const LimitOffsetClause& limitOffset); + void enforceLimitOffset(const LimitOffsetClause& limitOffset); + // Get the information, which columns stores how many entries of each // datatype. This information is computed on the first call to this function // `O(num-entries-in-table)` and then cached for subsequent usages. @@ -175,7 +183,7 @@ class Result { // Check that if the `varColMap` guarantees that a column is always defined // (i.e. that is contains no single undefined value) that there are indeed no - // undefined values in the `_idTable` of this result. Return `true` iff the + // undefined values in the `data_` of this result. Return `true` iff the // check is succesful. bool checkDefinedness(const VariableToColumnMap& varColMap); From 6b3f05cbc49ce5de3f1e7da01d8e6bf0616ac522 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 18 May 2024 02:25:42 +0200 Subject: [PATCH 027/133] Try to properly calculate duration --- src/engine/Operation.cpp | 45 +++++++++++++++------ src/engine/QueryExecutionContext.h | 1 + src/engine/Result.cpp | 63 ++++++++++++++++++++++++------ src/engine/Result.h | 15 +++++-- src/util/CacheableGenerator.h | 21 +++++++++- 5 files changed, 116 insertions(+), 29 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 5248517f7a..b4b084a5f8 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -147,18 +147,26 @@ std::shared_ptr Operation::getResult( // correct runtimeInfo. The children of the runtime info are already set // correctly because the result was computed, so we can pass `nullopt` as // the last argument. - updateRuntimeInformationOnSuccess(result, - ad_utility::CacheStatus::computed, - timer.msecs(), std::nullopt); + if (result.isDataEvaluated()) { + updateRuntimeInformationOnSuccess(result, + ad_utility::CacheStatus::computed, + timer.msecs(), std::nullopt); + } else { + // TODO check if this is sufficient here or we need more of + // `updateRuntimeInformationOnSuccess` functionality here. + runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; + } // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not // already perform it. An example for an operation that directly computes // the Limit is a full index scan with three variables. if (!supportsLimit()) { - ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; - // Note: both of the following calls have no effect and negligible - // runtime if neither a LIMIT nor an OFFSET were specified. - result.applyLimitOffset(_limit); - runtimeInfo().addLimitOffsetRow(_limit, limitTimer.msecs(), true); + runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, + true); + result.applyLimitOffset(_limit, + [runtimeInfo = getRuntimeInfoPointer()]( + std::chrono::milliseconds limitTime) { + runtimeInfo->totalTime_ += limitTime; + }); } else { result.enforceLimitOffset(_limit); } @@ -186,6 +194,10 @@ std::shared_ptr Operation::getResult( }); } }); + result.setOnNextChunkComputed([runtimeInfo = getRuntimeInfoPointer()]( + std::chrono::milliseconds duration) { + runtimeInfo->totalTime_ += duration; + }); } return CacheValue{std::move(result), runtimeInfo()}; }; @@ -202,7 +214,10 @@ std::shared_ptr Operation::getResult( return nullptr; } - updateRuntimeInformationOnSuccess(result, timer.msecs()); + if (result._resultPointer->resultTable()->isDataEvaluated()) { + updateRuntimeInformationOnSuccess(result, timer.msecs()); + } + if (result._resultPointer->resultTable()->isDataEvaluated()) { auto resultNumRows = result._resultPointer->resultTable()->idTable().size(); @@ -217,10 +232,18 @@ std::shared_ptr Operation::getResult( } else if (actuallyComputed) { return std::make_shared( Result::createResultAsMasterConsumer( - result._resultPointer->resultTable())); + result._resultPointer->resultTable(), + isRoot ? std::function{[this]() { signalQueryUpdate(); }} + : std::function{})); } return std::make_shared(Result::createResultWithFallback( - result._resultPointer->resultTable(), std::move(computeLambda))); + result._resultPointer->resultTable(), std::move(computeLambda), + [this, isRoot](auto duration) { + runtimeInfo().totalTime_ += duration; + if (isRoot) { + signalQueryUpdate(); + } + })); } catch (ad_utility::CancellationException& e) { e.setOperation(getDescriptor()); runtimeInfo().status_ = RuntimeInformation::Status::cancelled; diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 423f17f346..746014deb9 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -41,6 +41,7 @@ class CacheValue { // Clear listeners const_cast(*_resultTable).setOnSizeChanged({}); const_cast(*_resultTable).setOnGeneratorFinished({}); + const_cast(*_resultTable).setOnNextChunkComputed({}); } } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 74102c6e62..31f161b55a 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -10,6 +10,7 @@ #include "util/Exception.h" #include "util/IteratorWrapper.h" #include "util/Log.h" +#include "util/Timer.h" // _____________________________________________________________________________ string Result::asDebugString() const { @@ -116,7 +117,9 @@ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { } // _____________________________________________________________________________ -void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { +void Result::applyLimitOffset( + const LimitOffsetClause& limitOffset, + std::function limitTimeCallback) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). @@ -124,15 +127,19 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { !std::holds_alternative>(data_)); using Gen = ad_utility::CacheableGenerator; if (std::holds_alternative(data_)) { + ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; modifyIdTable(std::get(data_), limitOffset); + limitTimeCallback(limitTimer.msecs()); } else if (std::holds_alternative(data_)) { auto generator = - [](cppcoro::generator original, - LimitOffsetClause limitOffset) -> cppcoro::generator { + [](cppcoro::generator original, LimitOffsetClause limitOffset, + std::function limitTimeCallback) + -> cppcoro::generator { if (limitOffset._limit.value_or(1) == 0) { co_return; } for (auto&& idTable : original) { + ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; modifyIdTable(idTable, limitOffset); uint64_t offsetDelta = limitOffset.actualOffset(idTable.numRows()); limitOffset._offset -= offsetDelta; @@ -140,6 +147,7 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { limitOffset._limit.value() -= limitOffset.actualSize(idTable.numRows() - offsetDelta); } + limitTimeCallback(limitTimer.msecs()); if (limitOffset._offset == 0) { co_yield std::move(idTable); } @@ -147,7 +155,8 @@ void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { break; } } - }(std::move(std::get(data_)).extractGenerator(), limitOffset); + }(std::move(std::get(data_)).extractGenerator(), limitOffset, + std::move(limitTimeCallback)); data_.emplace(std::move(generator)); } else { AD_FAIL(); @@ -288,6 +297,16 @@ void Result::setOnGeneratorFinished( std::get(data_).setOnGeneratorFinished(std::move(onGeneratorFinished)); } +// _____________________________________________________________________________ +void Result::setOnNextChunkComputed( + std::function onNextChunkComputed) { + using Gen = ad_utility::CacheableGenerator; + // This should only ever get called on the "wrapped" generator stored in the + // cache. + AD_CONTRACT_CHECK(std::holds_alternative(data_)); + std::get(data_).setOnNextChunkComputed(std::move(onNextChunkComputed)); +} + Result Result::aggregateTable() const { using Gen = ad_utility::CacheableGenerator; AD_CONTRACT_CHECK(std::holds_alternative(data_)); @@ -315,12 +334,13 @@ Result Result::aggregateTable() const { } // _____________________________________________________________________________ -Result Result::createResultWithFallback(std::shared_ptr original, - std::function fallback) { +Result Result::createResultWithFallback( + std::shared_ptr original, std::function fallback, + std::function onIteration) { AD_CONTRACT_CHECK(!original->isDataEvaluated()); auto generator = [](std::shared_ptr sharedResult, - std::function fallback) - -> cppcoro::generator { + std::function fallback, + auto onIteration) -> cppcoro::generator { size_t index = 0; try { for (auto&& idTable : sharedResult->idTables()) { @@ -338,30 +358,47 @@ Result Result::createResultWithFallback(std::shared_ptr original, // If data is evaluated this means that this process is not deterministic // or that there's a wrong callback used here. AD_CORRECTNESS_CHECK(!freshResult.isDataEvaluated()); + auto start = std::chrono::steady_clock::now(); for (auto&& idTable : freshResult.idTables()) { + auto stop = std::chrono::steady_clock::now(); + if (onIteration) { + onIteration(std::chrono::duration_cast( + stop - start)); + } if (index > 0) { index--; continue; } co_yield idTable; + start = std::chrono::steady_clock::now(); + } + auto stop = std::chrono::steady_clock::now(); + if (onIteration) { + onIteration( + std::chrono::duration_cast(stop - start)); } }; - return Result{generator(std::move(original), std::move(fallback)), + return Result{generator(std::move(original), std::move(fallback), + std::move(onIteration)), original->sortedBy_, original->localVocab_}; } // _____________________________________________________________________________ Result Result::createResultAsMasterConsumer( - std::shared_ptr original) { + std::shared_ptr original, std::function onIteration) { using Gen = ad_utility::CacheableGenerator; AD_CONTRACT_CHECK(std::holds_alternative(original->data_)); - auto generator = [](auto original) -> cppcoro::generator { + auto generator = [](auto original, + auto onIteration) -> cppcoro::generator { using ad_utility::IteratorWrapper; auto& generator = std::get(original->data_); for (const IdTable& idTable : IteratorWrapper{generator, true}) { + if (onIteration) { + onIteration(); + } co_yield idTable; } }; - return Result{generator(std::move(original)), original->sortedBy_, - original->localVocab_}; + return Result{generator(std::move(original), std::move(onIteration)), + original->sortedBy_, original->localVocab_}; } diff --git a/src/engine/Result.h b/src/engine/Result.h index cac6f2fc89..7fd6174d51 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -172,7 +172,9 @@ class Result { // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those are still correct after performing this operation. - void applyLimitOffset(const LimitOffsetClause& limitOffset); + void applyLimitOffset( + const LimitOffsetClause& limitOffset, + std::function limitTimeCallback); void enforceLimitOffset(const LimitOffsetClause& limitOffset); @@ -193,11 +195,16 @@ class Result { void setOnGeneratorFinished(std::function onGeneratorFinished); + void setOnNextChunkComputed( + std::function onNextChunkComputed); + Result aggregateTable() const; - static Result createResultWithFallback(std::shared_ptr original, - std::function fallback); + static Result createResultWithFallback( + std::shared_ptr original, std::function fallback, + std::function onIteration); static Result createResultAsMasterConsumer( - std::shared_ptr original); + std::shared_ptr original, + std::function onIteration); }; diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 952ac58978..4e3819e280 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -5,6 +5,7 @@ #ifndef REUSABLEGENERATOR_H #define REUSABLEGENERATOR_H +#include #include #include #include @@ -38,6 +39,7 @@ class CacheableGenerator { // callback if we actually can shrink std::function onSizeChanged_{}; std::function onGeneratorFinished_{}; + std::function onNextChunkComputed_{}; public: explicit ComputationStorage(cppcoro::generator generator) @@ -75,13 +77,19 @@ class CacheableGenerator { return; } } - // TODO track processing time to update stats. + auto start = std::chrono::steady_clock::now(); if (generatorIterator_.has_value()) { AD_CONTRACT_CHECK(generatorIterator_.value() != generator_.end()); ++generatorIterator_.value(); } else { generatorIterator_ = generator_.begin(); } + auto stop = std::chrono::steady_clock::now(); + if (onNextChunkComputed_) { + onNextChunkComputed_( + std::chrono::duration_cast(stop - + start)); + } if (generatorIterator_.value() != generator_.end()) { cachedValues_.emplace_back(std::move(*generatorIterator_.value())); if (onSizeChanged_) { @@ -130,6 +138,12 @@ class CacheableGenerator { onGeneratorFinished_ = std::move(onGeneratorFinished); } + void setOnNextChunkComputed( + std::function onNextChunkComputed) { + std::lock_guard lock{mutex_}; + onNextChunkComputed_ = std::move(onNextChunkComputed); + } + void forEachCachedValue( const std::invocable auto& function) const { std::shared_lock lock{mutex_}; @@ -252,6 +266,11 @@ class CacheableGenerator { void setOnGeneratorFinished(std::function onGeneratorFinished) { computationStorage_->setOnGeneratorFinished(std::move(onGeneratorFinished)); } + + void setOnNextChunkComputed( + std::function onNextChunkComputed) { + computationStorage_->setOnNextChunkComputed(std::move(onNextChunkComputed)); + } }; }; // namespace ad_utility From ff5a6ea6a7da6e36d7446445c615383a59af0fd8 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 18 May 2024 02:42:02 +0200 Subject: [PATCH 028/133] Apply formatting --- src/engine/CountAvailablePredicates.cpp | 3 ++- src/engine/ExportQueryExecutionTrees.h | 4 ++-- src/engine/HasPredicateScan.h | 2 +- src/engine/TextIndexScanForEntity.cpp | 3 ++- src/engine/TextIndexScanForWord.cpp | 3 ++- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index 78c1f6ae5a..052a9f9396 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -100,7 +100,8 @@ size_t CountAvailablePredicates::getCostEstimate() { } // _____________________________________________________________________________ -Result CountAvailablePredicates::computeResult([[maybe_unused]] bool requestLaziness) { +Result CountAvailablePredicates::computeResult( + [[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "CountAvailablePredicates result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(2); diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 46699171d6..8958370b57 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -51,8 +51,8 @@ class ExportQueryExecutionTrees { // results. static nlohmann::json computeResultAsJSON( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, - MediaType mediaType, CancellationHandle cancellationHandle); + const ad_utility::Timer& requestTimer, MediaType mediaType, + CancellationHandle cancellationHandle); // Convert the `id` to a human-readable string. The `index` is used to resolve // `Id`s with datatype `VocabIndex` or `TextRecordIndex`. The `localVocab` is diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h index e1cd4e821e..8022bca543 100644 --- a/src/engine/HasPredicateScan.h +++ b/src/engine/HasPredicateScan.h @@ -106,7 +106,7 @@ class HasPredicateScan : public Operation { template Result computeSubqueryS(IdTable* result, - const CompactVectorOfStrings& patterns); + const CompactVectorOfStrings& patterns); private: Result computeResult([[maybe_unused]] bool requestLaziness) override; diff --git a/src/engine/TextIndexScanForEntity.cpp b/src/engine/TextIndexScanForEntity.cpp index 2d8e236bc9..80150ed29d 100644 --- a/src/engine/TextIndexScanForEntity.cpp +++ b/src/engine/TextIndexScanForEntity.cpp @@ -14,7 +14,8 @@ TextIndexScanForEntity::TextIndexScanForEntity( word_(std::move(word)) {} // _____________________________________________________________________________ -Result TextIndexScanForEntity::computeResult([[maybe_unused]] bool requestLaziness) { +Result TextIndexScanForEntity::computeResult( + [[maybe_unused]] bool requestLaziness) { IdTable idTable = getExecutionContext()->getIndex().getEntityMentionsForWord( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 42860adfb2..5a7fa19425 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -13,7 +13,8 @@ TextIndexScanForWord::TextIndexScanForWord(QueryExecutionContext* qec, isPrefix_(word_.ends_with('*')) {} // _____________________________________________________________________________ -Result TextIndexScanForWord::computeResult([[maybe_unused]] bool requestLaziness) { +Result TextIndexScanForWord::computeResult( + [[maybe_unused]] bool requestLaziness) { IdTable idTable = getExecutionContext()->getIndex().getWordPostingsForTerm( word_, getExecutionContext()->getAllocator()); From b974c7df3f9d72eeab9633fc315c24af279e72bb Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 18 May 2024 02:47:59 +0200 Subject: [PATCH 029/133] Fix compilation on gcc 11 and gcc 12 --- src/util/CacheableGenerator.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 4e3819e280..757674d8fe 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -27,8 +27,11 @@ class CacheableGenerator { enum class MasterIteratorState { NOT_STARTED, MASTER_STARTED, MASTER_DONE }; + class Iterator; + class ComputationStorage { friend CacheableGenerator; + friend Iterator; mutable std::shared_mutex mutex_; std::condition_variable_any conditionVariable_; cppcoro::generator generator_; @@ -211,7 +214,7 @@ class CacheableGenerator { } friend bool operator==(const Iterator& it, IteratorSentinel) noexcept { - return !it.storage_->lock()->isDone(it.currentIndex_); + return !it.storage()->isDone(it.currentIndex_); } friend bool operator!=(const Iterator& it, IteratorSentinel s) noexcept { From 8b99020da7d2877167f80fdd89c4c8e3c3556970 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 18 May 2024 03:23:14 +0200 Subject: [PATCH 030/133] Add correct visibility modifiers --- src/util/CacheableGenerator.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 757674d8fe..6d0b760165 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -27,8 +27,10 @@ class CacheableGenerator { enum class MasterIteratorState { NOT_STARTED, MASTER_STARTED, MASTER_DONE }; + public: class Iterator; + private: class ComputationStorage { friend CacheableGenerator; friend Iterator; From 5f8ab65288dc4476473eca5f0c61ccf484a43348 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 18 May 2024 03:34:59 +0200 Subject: [PATCH 031/133] Try fixing the compilation issue for real this time --- src/util/CacheableGenerator.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 6d0b760165..7094d01e90 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -27,13 +27,8 @@ class CacheableGenerator { enum class MasterIteratorState { NOT_STARTED, MASTER_STARTED, MASTER_DONE }; - public: - class Iterator; - - private: class ComputationStorage { friend CacheableGenerator; - friend Iterator; mutable std::shared_mutex mutex_; std::condition_variable_any conditionVariable_; cppcoro::generator generator_; @@ -119,12 +114,15 @@ class CacheableGenerator { return cachedValues_.at(index).value(); } + // Needs to be public in order to compile with gcc 11 & 12 + public: bool isDone(size_t index) noexcept { std::shared_lock lock{mutex_}; return index == cachedValues_.size() && generatorIterator_.has_value() && generatorIterator_.value() == generator_.end(); } + private: void clearMaster() { std::unique_lock lock{mutex_}; AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::MASTER_DONE); From 3a95ef589afd325249cca4224658f6627579dbf7 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 18 May 2024 18:05:19 +0200 Subject: [PATCH 032/133] Try to fix compilation issue on macOS --- src/engine/Server.cpp | 4 ++-- src/engine/Server.h | 2 +- src/global/Constants.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index a710950aae..d53216d28e 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -645,7 +645,7 @@ boost::asio::awaitable Server::processQuery( } else if (containsParam("action", "binary_export")) { mediaType = MediaType::octetStream; } - std::optional maxSend = + std::optional maxSend = params.contains("send") ? std::optional{std::stoul(params.at("send"))} : std::nullopt; // Limit JSON requests by default @@ -827,7 +827,7 @@ Awaitable Server::computeInNewThread(Function function, net::awaitable> Server::parseAndPlan( const std::string& query, QueryExecutionContext& qec, SharedCancellationHandle handle, TimeLimit timeLimit, - std::optional maxSend) { + std::optional maxSend) { auto handleCopy = handle; // The usage of an `optional` here is required because of a limitation in diff --git a/src/engine/Server.h b/src/engine/Server.h index 0a56a712f0..a0c40be78e 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -184,7 +184,7 @@ class Server { net::awaitable> parseAndPlan( const std::string& query, QueryExecutionContext& qec, SharedCancellationHandle handle, TimeLimit timeLimit, - std::optional maxSend); + std::optional maxSend); /// Acquire the `CancellationHandle` for the given `QueryId`, start the /// watchdog and call `cancelAfterDeadline` to set the timeout after diff --git a/src/global/Constants.h b/src/global/Constants.h index c7b68d5735..06b0e4b6ee 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -23,7 +23,7 @@ static const ad_utility::MemorySize STXXL_DISK_SIZE_INDEX_BUILDER = 1_GB; static constexpr ad_utility::MemorySize DEFAULT_MEM_FOR_QUERIES = 4_GB; -static const size_t MAX_NOF_ROWS_IN_RESULT = 1'000'000; +constexpr uint64_t MAX_NOF_ROWS_IN_RESULT = 1'000'000; static const size_t MIN_WORD_PREFIX_SIZE = 4; static const char PREFIX_CHAR = '*'; From e2dc667170991ac4f40f1716a283472ab297d883 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 20 May 2024 21:40:10 +0200 Subject: [PATCH 033/133] Implement PoC lazy operation for index scan and filter operations --- src/engine/Filter.cpp | 72 +++++++++++++++++++++++++++------------- src/engine/Filter.h | 8 +++-- src/engine/IndexScan.cpp | 21 +++++++++++- src/engine/IndexScan.h | 4 ++- 4 files changed, 77 insertions(+), 28 deletions(-) diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index d69aef51c0..72054bacbc 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -43,41 +43,67 @@ string Filter::getDescriptor() const { } // _____________________________________________________________________________ -Result Filter::computeResult([[maybe_unused]] bool requestLaziness) { +Result Filter::computeResult(bool requestLaziness) { LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl; - std::shared_ptr subRes = _subtree->getResult(); + std::shared_ptr subRes = _subtree->getResult(requestLaziness); LOG(DEBUG) << "Filter result computation..." << endl; checkCancellation(); - IdTable idTable{getExecutionContext()->getAllocator()}; - idTable.setNumColumns(subRes->idTable().numColumns()); - - size_t width = idTable.numColumns(); - CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, &idTable, *subRes); - LOG(DEBUG) << "Filter result computation done." << endl; - checkCancellation(); + if (subRes->isDataEvaluated()) { + sparqlExpression::EvaluationContext evaluationContext( + *getExecutionContext(), _subtree->getVariableColumns(), + subRes->idTable(), getExecutionContext()->getAllocator(), + subRes->localVocab(), cancellationHandle_); + + // TODO This should be a mandatory argument to the + // EvaluationContext constructor. + evaluationContext._columnsByWhichResultIsSorted = subRes->sortedBy(); + + size_t width = evaluationContext._inputTable.numColumns(); + IdTable result = CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, + evaluationContext); + LOG(DEBUG) << "Filter result computation done." << endl; + checkCancellation(); + + return {std::move(result), resultSortedOn(), subRes->getSharedLocalVocab()}; + } + return {filterInChunks(subRes), resultSortedOn(), + subRes->getSharedLocalVocab()}; +} - return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; +// _____________________________________________________________________________ +cppcoro::generator Filter::filterInChunks( + std::shared_ptr subRes) { + for (const IdTable& idTable : subRes->idTables()) { + sparqlExpression::EvaluationContext evaluationContext( + *getExecutionContext(), _subtree->getVariableColumns(), idTable, + getExecutionContext()->getAllocator(), subRes->localVocab(), + cancellationHandle_); + + // TODO This should be a mandatory argument to the + // EvaluationContext constructor. + evaluationContext._columnsByWhichResultIsSorted = subRes->sortedBy(); + + size_t width = evaluationContext._inputTable.numColumns(); + co_yield CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, + evaluationContext); + LOG(DEBUG) << "Filter result chunk done." << endl; + checkCancellation(); + } } // _____________________________________________________________________________ template -void Filter::computeFilterImpl(IdTable* outputIdTable, - const Result& inputResultTable) { - sparqlExpression::EvaluationContext evaluationContext( - *getExecutionContext(), _subtree->getVariableColumns(), - inputResultTable.idTable(), getExecutionContext()->getAllocator(), - inputResultTable.localVocab(), cancellationHandle_); - - // TODO This should be a mandatory argument to the EvaluationContext - // constructor. - evaluationContext._columnsByWhichResultIsSorted = inputResultTable.sortedBy(); +IdTable Filter::computeFilterImpl( + sparqlExpression::EvaluationContext& evaluationContext) { + IdTable idTable{getExecutionContext()->getAllocator()}; + idTable.setNumColumns(evaluationContext._inputTable.numColumns()); sparqlExpression::ExpressionResult expressionResult = _expression.getPimpl()->evaluate(&evaluationContext); - const auto input = inputResultTable.idTable().asStaticView(); - auto output = std::move(*outputIdTable).toStatic(); + const auto input = evaluationContext._inputTable.asStaticView(); + auto output = std::move(idTable).toStatic(); // Clang 17 seems to incorrectly deduce the type, so try to trick it std::remove_const_t& output2 = output; @@ -123,7 +149,7 @@ void Filter::computeFilterImpl(IdTable* outputIdTable, std::visit(visitor, std::move(expressionResult)); - *outputIdTable = std::move(output).toDynamic(); + return std::move(output).toDynamic(); } // _____________________________________________________________________________ diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 1b1119491a..03a28b2d63 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -58,9 +58,11 @@ class Filter : public Operation { return _subtree->getVariableColumns(); } - Result computeResult([[maybe_unused]] bool requestLaziness) override; + Result computeResult(bool requestLaziness) override; template - void computeFilterImpl(IdTable* outputIdTable, - const Result& inputResultTable); + IdTable computeFilterImpl( + sparqlExpression::EvaluationContext& evaluationContext); + + cppcoro::generator filterInChunks(std::shared_ptr subRes); }; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 0aec6a3e6a..9ef45afac0 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -122,9 +122,28 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { std::ranges::for_each(additionalVariables_, addCol); return variableToColumnMap; } + // _____________________________________________________________________________ -Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) { +cppcoro::generator IndexScan::scanInChunks() const { + auto metadata = getMetadataForScan(*this); + if (!metadata.has_value()) { + co_return; + } + auto blocksSpan = + CompressedRelationReader::getBlocksFromMetadata(metadata.value()); + std::vector blocks{blocksSpan.begin(), + blocksSpan.end()}; + for (IdTable& idTable : getLazyScan(*this, std::move(blocks))) { + co_yield std::move(idTable); + } +} + +// _____________________________________________________________________________ +Result IndexScan::computeResult(bool requestLaziness) { LOG(DEBUG) << "IndexScan result computation...\n"; + if (requestLaziness) { + return {scanInChunks(), resultSortedOn(), LocalVocab{}}; + } IdTable idTable{getExecutionContext()->getAllocator()}; using enum Permutation::Enum; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index c8aa89bba9..5191a1e55f 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -104,7 +104,7 @@ class IndexScan : public Operation { std::array getPermutedTriple() const; private: - Result computeResult([[maybe_unused]] bool requestLaziness) override; + Result computeResult(bool requestLaziness) override; vector getChildren() override { return {}; } @@ -116,6 +116,8 @@ class IndexScan : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; + cppcoro::generator scanInChunks() const; + // Helper functions for the public `getLazyScanFor...` functions (see above). static Permutation::IdTableGenerator getLazyScan( const IndexScan& s, std::vector blocks); From 070494f0b806afcde75e2e82c404f82573a4dd34 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 20 May 2024 21:56:07 +0200 Subject: [PATCH 034/133] Fix double limit offset row --- src/engine/ExportQueryExecutionTrees.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index f88f86f273..d79df234e4 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -663,8 +663,6 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( j["runtimeInformation"]["meta"] = nlohmann::ordered_json( qet.getRootOperation()->getRuntimeInfoWholeQuery()); RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo(); - runtimeInformation.addLimitOffsetRow( - query._limitOffset, std::chrono::milliseconds::zero(), false); runtimeInformation.addDetail("executed-implicitly-during-query-export", true); j["runtimeInformation"]["query_execution_tree"] = nlohmann::ordered_json(runtimeInformation); From f631c1368090dcf1fe6ef925d088e15a0e837bd9 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 20 May 2024 22:06:40 +0200 Subject: [PATCH 035/133] Fix wrong assertion --- src/engine/Operation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index b4b084a5f8..2c424e3021 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -141,7 +141,7 @@ std::shared_ptr Operation::getResult( // This check doesn't make sense when the result has not been evaluated // yet, so it should be moved into the operations eventually. AD_EXPENSIVE_CHECK( - result.isDataEvaluated() || + !result.isDataEvaluated() || result.checkDefinedness(getExternallyVisibleVariableColumns())); // Make sure that the results that are written to the cache have the // correct runtimeInfo. The children of the runtime info are already set From c249628f817d404ecfcb6c0c16bdd70530452775 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 20 May 2024 22:07:14 +0200 Subject: [PATCH 036/133] Properly request lazy results when limit clause is present --- src/engine/ExportQueryExecutionTrees.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index d79df234e4..6b7e012e87 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -639,7 +639,8 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle) { - std::shared_ptr resultTable = qet.getResult(); + std::shared_ptr resultTable = + qet.getResult(query._limitOffset._limit.has_value()); resultTable->logResultSize(); auto timeResultComputation = requestTimer.msecs(); From ea7fd79d062c58596ab92bb20970e39573c31c5f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 21 May 2024 00:59:41 +0200 Subject: [PATCH 037/133] Fix bugs and segfaults --- src/engine/CartesianProductJoin.h | 5 ++- src/engine/IndexScan.h | 4 +-- src/engine/Operation.cpp | 11 ++++--- src/engine/Operation.h | 5 ++- src/engine/Result.cpp | 53 +++++++++++++++++-------------- src/engine/Result.h | 3 +- src/util/CacheableGenerator.h | 22 +++++++------ test/engine/ValuesForTesting.h | 4 ++- 8 files changed, 63 insertions(+), 44 deletions(-) diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 872698f733..780b37d4eb 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -67,7 +67,10 @@ class CartesianProductJoin : public Operation { bool knownEmptyResult() override; // The Cartesian product join can efficiently evaluate a limited result. - [[nodiscard]] bool supportsLimit() const override { return true; } + [[nodiscard]] bool supportsLimit( + [[maybe_unused]] bool lazyResult) const override { + return true; + } protected: // Don't promise any sorting of the result. diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 5191a1e55f..515dc4feff 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -92,8 +92,8 @@ class IndexScan : public Operation { } // Currently only the full scans support a limit clause. - [[nodiscard]] bool supportsLimit() const override { - return getResultWidth() == 3; + [[nodiscard]] bool supportsLimit(bool lazyResult) const override { + return !lazyResult && getResultWidth() == 3; } Permutation::Enum permutation() const { return permutation_; } diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 2c424e3021..72e04beeb7 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -159,7 +159,7 @@ std::shared_ptr Operation::getResult( // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not // already perform it. An example for an operation that directly computes // the Limit is a full index scan with three variables. - if (!supportsLimit()) { + if (!supportsLimit(!result.isDataEvaluated())) { runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, true); result.applyLimitOffset(_limit, @@ -214,9 +214,10 @@ std::shared_ptr Operation::getResult( return nullptr; } - if (result._resultPointer->resultTable()->isDataEvaluated()) { - updateRuntimeInformationOnSuccess(result, timer.msecs()); - } + updateRuntimeInformationOnSuccess( + result, result._resultPointer->resultTable()->isDataEvaluated() + ? timer.msecs() + : result._resultPointer->runtimeInfo().totalTime_); if (result._resultPointer->resultTable()->isDataEvaluated()) { auto resultNumRows = @@ -330,7 +331,7 @@ void Operation::updateRuntimeInformationOnSuccess( // ____________________________________________________________________________________________________________________ void Operation::updateRuntimeInformationOnSuccess( - const ConcurrentLruCache ::ResultAndCacheStatus& resultAndCacheStatus, + const ConcurrentLruCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) { updateRuntimeInformationOnSuccess( *resultAndCacheStatus._resultPointer->resultTable(), diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 665c0fb3f2..b46ace7a92 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -103,7 +103,10 @@ class Operation { // True iff this operation directly implement a `OFFSET` and `LIMIT` clause on // its result. - [[nodiscard]] virtual bool supportsLimit() const { return false; } + [[nodiscard]] virtual bool supportsLimit( + [[maybe_unused]] bool lazyResult) const { + return false; + } public: virtual float getMultiplicity(size_t col) = 0; diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 31f161b55a..b407344bbf 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -37,20 +37,21 @@ auto Result::getMergedLocalVocab(const Result& resultTable1, LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } // _____________________________________________________________________________ -void Result::validateIdTable(const IdTable& idTable) const { - AD_CONTRACT_CHECK(std::ranges::all_of(sortedBy_, [&idTable](size_t numCols) { +void Result::validateIdTable(const IdTable& idTable, + const std::vector& sortedBy) { + AD_CONTRACT_CHECK(std::ranges::all_of(sortedBy, [&idTable](size_t numCols) { return numCols < idTable.numColumns(); })); - [[maybe_unused]] auto compareRowsByJoinColumns = [this](const auto& row1, - const auto& row2) { - for (size_t col : sortedBy_) { - if (row1[col] != row2[col]) { - return row1[col] < row2[col]; - } - } - return false; - }; + [[maybe_unused]] auto compareRowsByJoinColumns = + [&sortedBy](const auto& row1, const auto& row2) { + for (size_t col : sortedBy) { + if (row1[col] != row2[col]) { + return row1[col] < row2[col]; + } + } + return false; + }; AD_EXPENSIVE_CHECK(std::ranges::is_sorted(idTable, compareRowsByJoinColumns)); } @@ -61,7 +62,7 @@ Result::Result(IdTable idTable, std::vector sortedBy, sortedBy_{std::move(sortedBy)}, localVocab_{std::move(localVocab.localVocab_)} { AD_CONTRACT_CHECK(localVocab_ != nullptr); - validateIdTable(std::get(data_)); + validateIdTable(std::get(data_), sortedBy_); } // _____________________________________________________________________________ @@ -75,13 +76,13 @@ Result::Result(cppcoro::generator idTables, std::vector sortedBy, SharedLocalVocabWrapper localVocab) : data_{ad_utility::CacheableGenerator{ - [this, idTables = std::move( - idTables)]() mutable -> cppcoro::generator { + [](auto idTables, + auto sortedBy) mutable -> cppcoro::generator { for (IdTable& idTable : idTables) { - validateIdTable(idTable); + validateIdTable(idTable, sortedBy); co_yield std::move(idTable); } - }()}}, + }(std::move(idTables), sortedBy)}}, sortedBy_{std::move(sortedBy)}, localVocab_{std::move(localVocab.localVocab_)} { AD_CONTRACT_CHECK(localVocab_ != nullptr); @@ -123,6 +124,7 @@ void Result::applyLimitOffset( // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). + AD_CONTRACT_CHECK(limitTimeCallback); AD_CONTRACT_CHECK( !std::holds_alternative>(data_)); using Gen = ad_utility::CacheableGenerator; @@ -140,12 +142,13 @@ void Result::applyLimitOffset( } for (auto&& idTable : original) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; + size_t originalSize = idTable.numRows(); modifyIdTable(idTable, limitOffset); - uint64_t offsetDelta = limitOffset.actualOffset(idTable.numRows()); + uint64_t offsetDelta = limitOffset.actualOffset(originalSize); limitOffset._offset -= offsetDelta; if (limitOffset._limit.has_value()) { limitOffset._limit.value() -= - limitOffset.actualSize(idTable.numRows() - offsetDelta); + limitOffset.actualSize(originalSize - offsetDelta); } limitTimeCallback(limitTimer.msecs()); if (limitOffset._offset == 0) { @@ -175,11 +178,13 @@ void Result::enforceLimitOffset(const LimitOffsetClause& limitOffset) { auto generator = [](cppcoro::generator original, LimitOffsetClause limitOffset) -> cppcoro::generator { + size_t elementCount = 0; for (auto&& idTable : original) { - AD_CONTRACT_CHECK(idTable.numRows() == - limitOffset.actualSize(idTable.numRows())); + elementCount += idTable.numRows(); + AD_CONTRACT_CHECK(elementCount <= limitOffset.actualSize(elementCount)); co_yield std::move(idTable); } + AD_CONTRACT_CHECK(elementCount == limitOffset.actualSize(elementCount)); }(std::move(std::get(data_)).extractGenerator(), limitOffset); data_.emplace(std::move(generator)); } else { @@ -378,9 +383,9 @@ Result Result::createResultWithFallback( std::chrono::duration_cast(stop - start)); } }; - return Result{generator(std::move(original), std::move(fallback), - std::move(onIteration)), - original->sortedBy_, original->localVocab_}; + return Result{ + generator(original, std::move(fallback), std::move(onIteration)), + original->sortedBy_, original->localVocab_}; } // _____________________________________________________________________________ @@ -399,6 +404,6 @@ Result Result::createResultAsMasterConsumer( co_yield idTable; } }; - return Result{generator(std::move(original), std::move(onIteration)), + return Result{generator(original, std::move(onIteration)), original->sortedBy_, original->localVocab_}; } diff --git a/src/engine/Result.h b/src/engine/Result.h index 7fd6174d51..cba50405ea 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -76,7 +76,8 @@ class Result { Result(cppcoro::generator idTables, std::vector sortedBy, LocalVocabPtr localVocab); - void validateIdTable(const IdTable& idTable) const; + static void validateIdTable(const IdTable& idTable, + const std::vector& sortedBy); public: // Construct from the given arguments (see above) and check the following diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 7094d01e90..ed886d1376 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -2,8 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Robin Textor-Falconi -#ifndef REUSABLEGENERATOR_H -#define REUSABLEGENERATOR_H +#ifndef CACHEABLEGENERATOR_H +#define CACHEABLEGENERATOR_H #include #include @@ -67,6 +67,10 @@ class CacheableGenerator { } return; } + if (generatorIterator_.has_value() && + generatorIterator_.value() == generator_.end()) { + return; + } if (masterState_ == MasterIteratorState::MASTER_STARTED) { if (!isMaster) { conditionVariable_.wait(lock, [this, index]() { @@ -118,7 +122,7 @@ class CacheableGenerator { public: bool isDone(size_t index) noexcept { std::shared_lock lock{mutex_}; - return index == cachedValues_.size() && generatorIterator_.has_value() && + return index >= cachedValues_.size() && generatorIterator_.has_value() && generatorIterator_.value() == generator_.end(); } @@ -201,7 +205,7 @@ class CacheableGenerator { public: explicit Iterator(std::weak_ptr storage, bool isMaster) - : storage_{storage, + : storage_{std::move(storage), [isMaster](auto&& storage) { if (isMaster) { auto pointer = storage.lock(); @@ -214,7 +218,7 @@ class CacheableGenerator { } friend bool operator==(const Iterator& it, IteratorSentinel) noexcept { - return !it.storage()->isDone(it.currentIndex_); + return it.storage()->isDone(it.currentIndex_); } friend bool operator!=(const Iterator& it, IteratorSentinel s) noexcept { @@ -238,14 +242,14 @@ class CacheableGenerator { // Need to provide post-increment operator to implement the 'Range' concept. void operator++(int) { (void)operator++(); } - Reference operator*() const noexcept { + Reference operator*() const { return storage()->getCachedValue(currentIndex_); } - Pointer operator->() const noexcept { return std::addressof(operator*()); } + Pointer operator->() const { return std::addressof(operator*()); } }; - Iterator begin(bool isMaster = false) const noexcept { + Iterator begin(bool isMaster = false) const { return Iterator{computationStorage_, isMaster}; } @@ -277,4 +281,4 @@ class CacheableGenerator { }; }; // namespace ad_utility -#endif // REUSABLEGENERATOR_H +#endif // CACHEABLEGENERATOR_H diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index a0f7e09e30..9e485e9d36 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -59,7 +59,9 @@ class ValuesForTesting : public Operation { } return {std::move(table), resultSortedOn(), localVocab_.clone()}; } - bool supportsLimit() const override { return supportsLimit_; } + bool supportsLimit([[maybe_unused]] bool lazyResult) const override { + return supportsLimit_; + } private: // ___________________________________________________________________________ From 1e9d0d60ab78b279d18d5157d3b72ba73a265895 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 21 May 2024 03:36:46 +0200 Subject: [PATCH 038/133] Formatting --- src/engine/Filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 03a28b2d63..107349eebd 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -64,5 +64,6 @@ class Filter : public Operation { IdTable computeFilterImpl( sparqlExpression::EvaluationContext& evaluationContext); - cppcoro::generator filterInChunks(std::shared_ptr subRes); + cppcoro::generator filterInChunks( + std::shared_ptr subRes); }; From dfb7ba6c85f5af970a4b0f289e9da6e2c35227ab Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 22 May 2024 00:56:00 +0200 Subject: [PATCH 039/133] Apply small refactoring change --- src/engine/Join.cpp | 5 +++-- src/engine/Operation.cpp | 7 ++++--- src/engine/Operation.h | 8 ++++++-- src/engine/QueryExecutionTree.h | 6 +++--- src/engine/Result.cpp | 5 ++--- src/engine/Result.h | 4 ++-- test/OperationTest.cpp | 8 ++++---- 7 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index e2b597d234..d726dcc6d5 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -123,8 +123,9 @@ Result Join::computeResult([[maybe_unused]] bool requestLaziness) { // cache". So effectively, this returns the result if it is small, contains // UNDEF values, or is contained in the cache, otherwise `nullptr`. return tree.getRootOperation()->getResult( - false, (isSmall || containsUndef) ? ComputationMode::CACHE_ONLY - : ComputationMode::FULL); + false, (isSmall || containsUndef) + ? ComputationMode::ONLY_IF_CACHED + : ComputationMode::FULLY_MATERIALIZED); }; auto leftResIfCached = getCachedOrSmallResult(*_left, _leftJoinCol); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 72e04beeb7..d76c980cc3 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -125,9 +125,10 @@ std::shared_ptr Operation::getResult( checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); - Result result = computeResult(computationMode == ComputationMode::LAZY); + Result result = + computeResult(computationMode == ComputationMode::LAZY_IF_SUPPORTED); actuallyComputed = true; - AD_CONTRACT_CHECK(computationMode == ComputationMode::LAZY || + AD_CONTRACT_CHECK(computationMode == ComputationMode::LAZY_IF_SUPPORTED || result.isDataEvaluated()); checkCancellation(); @@ -202,7 +203,7 @@ std::shared_ptr Operation::getResult( return CacheValue{std::move(result), runtimeInfo()}; }; - bool onlyReadFromCache = computationMode == ComputationMode::CACHE_ONLY; + bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; auto result = pinResult diff --git a/src/engine/Operation.h b/src/engine/Operation.h index b46ace7a92..1eae3c5503 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -21,7 +21,11 @@ // forward declaration needed to break dependencies class QueryExecutionTree; -enum class ComputationMode { FULL, CACHE_ONLY, LAZY }; +enum class ComputationMode { + FULLY_MATERIALIZED, + ONLY_IF_CACHED, + LAZY_IF_SUPPORTED +}; class Operation { using SharedCancellationHandle = ad_utility::SharedCancellationHandle; @@ -159,7 +163,7 @@ class Operation { */ std::shared_ptr getResult( bool isRoot = false, - ComputationMode computationMode = ComputationMode::FULL); + ComputationMode computationMode = ComputationMode::FULLY_MATERIALIZED); // Use the same cancellation handle for all children of an operation (= query // plan rooted at that operation). As soon as one child is aborted, the whole diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 64d94e0093..7f7871f494 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -52,9 +52,9 @@ class QueryExecutionTree { size_t getResultWidth() const { return rootOperation_->getResultWidth(); } std::shared_ptr getResult(bool requestLaziness = false) const { - return rootOperation_->getResult(isRoot(), requestLaziness - ? ComputationMode::LAZY - : ComputationMode::FULL); + return rootOperation_->getResult( + isRoot(), requestLaziness ? ComputationMode::LAZY_IF_SUPPORTED + : ComputationMode::FULLY_MATERIALIZED); } // A variable, its column index in the Id space result, and the `ResultType` diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index b407344bbf..679b3fd8fa 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -26,11 +26,10 @@ string Result::asDebugString() const { } // _____________________________________________________________________________ -auto Result::getMergedLocalVocab(const Result& resultTable1, - const Result& resultTable2) +auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) -> SharedLocalVocabWrapper { return getMergedLocalVocab( - std::array{std::cref(resultTable1), std::cref(resultTable2)}); + std::array{std::cref(result1), std::cref(result2)}); } // _____________________________________________________________________________ diff --git a/src/engine/Result.h b/src/engine/Result.h index cba50405ea..602a3e8028 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -139,8 +139,8 @@ class Result { // Like `getSharedLocalVocabFrom`, but takes more than one result and merges // all the corresponding local vocabs. - static SharedLocalVocabWrapper getMergedLocalVocab( - const Result& resultTable1, const Result& resultTable2); + static SharedLocalVocabWrapper getMergedLocalVocab(const Result& result1, + const Result& result2); // Overload for more than two `Results` template diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index d44d331332..3d9d3d9f0e 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -39,7 +39,7 @@ TEST(OperationTest, getResultOnlyCached) { NeutralElementOperation n{qec}; // The second `true` means "only read the result if it was cached". // We have just cleared the cache, and so this should return `nullptr`. - EXPECT_EQ(n.getResult(true, ComputationMode::CACHE_ONLY), nullptr); + EXPECT_EQ(n.getResult(true, ComputationMode::ONLY_IF_CACHED), nullptr); EXPECT_EQ(n.runtimeInfo().status_, RuntimeInformation::Status::notStarted); // Nothing has been stored in the cache by this call. EXPECT_EQ(qec->getQueryTreeCache().numNonPinnedEntries(), 0); @@ -58,7 +58,7 @@ TEST(OperationTest, getResultOnlyCached) { // When we now request to only return the result if it is cached, we should // get exactly the same `shared_ptr` as with the previous call. NeutralElementOperation n3{qec}; - EXPECT_EQ(n3.getResult(true, ComputationMode::CACHE_ONLY), result); + EXPECT_EQ(n3.getResult(true, ComputationMode::ONLY_IF_CACHED), result); EXPECT_EQ(n3.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedNotPinned); @@ -67,7 +67,7 @@ TEST(OperationTest, getResultOnlyCached) { QueryExecutionContext qecCopy{*qec}; qecCopy._pinResult = true; NeutralElementOperation n4{&qecCopy}; - EXPECT_EQ(n4.getResult(true, ComputationMode::CACHE_ONLY), result); + EXPECT_EQ(n4.getResult(true, ComputationMode::ONLY_IF_CACHED), result); // The cache status is `cachedNotPinned` because we found the element cached // but not pinned (it does reflect the status BEFORE the operation). @@ -79,7 +79,7 @@ TEST(OperationTest, getResultOnlyCached) { // We have pinned the result, so requesting it again should return a pinned // result. qecCopy._pinResult = false; - EXPECT_EQ(n4.getResult(true, ComputationMode::CACHE_ONLY), result); + EXPECT_EQ(n4.getResult(true, ComputationMode::ONLY_IF_CACHED), result); EXPECT_EQ(n4.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedPinned); From 7d01e599ac774cc750c0ac136937f4cb3178342f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 22 May 2024 17:49:03 +0200 Subject: [PATCH 040/133] Correct wrong order of ternary statement --- src/engine/Join.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index d726dcc6d5..5e9caa0c04 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -123,9 +123,8 @@ Result Join::computeResult([[maybe_unused]] bool requestLaziness) { // cache". So effectively, this returns the result if it is small, contains // UNDEF values, or is contained in the cache, otherwise `nullptr`. return tree.getRootOperation()->getResult( - false, (isSmall || containsUndef) - ? ComputationMode::ONLY_IF_CACHED - : ComputationMode::FULLY_MATERIALIZED); + false, (isSmall || containsUndef) ? ComputationMode::FULLY_MATERIALIZED + : ComputationMode::ONLY_IF_CACHED); }; auto leftResIfCached = getCachedOrSmallResult(*_left, _leftJoinCol); From 5b2335a69180a4563fcf5740575d260c45dfc0a9 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 22 May 2024 17:53:30 +0200 Subject: [PATCH 041/133] Add TODO --- src/engine/Join.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 5e9caa0c04..9cf9fdc52a 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -122,6 +122,7 @@ Result Join::computeResult([[maybe_unused]] bool requestLaziness) { // The third argument means "only get the result if it can be read from the // cache". So effectively, this returns the result if it is small, contains // UNDEF values, or is contained in the cache, otherwise `nullptr`. + // TODO Add a unit test that checks the correct conditions return tree.getRootOperation()->getResult( false, (isSmall || containsUndef) ? ComputationMode::FULLY_MATERIALIZED : ComputationMode::ONLY_IF_CACHED); From 9c445eaee06d60f6384f91bd68b77767ea2ea9c0 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 23 May 2024 17:24:45 +0200 Subject: [PATCH 042/133] Change how maxSend works --- src/engine/CartesianProductJoin.cpp | 2 +- src/engine/ExportQueryExecutionTrees.cpp | 91 +++++++++--------------- src/engine/ExportQueryExecutionTrees.h | 28 +++----- src/engine/QueryPlanner.cpp | 4 +- src/engine/Server.cpp | 29 +++++--- src/engine/Server.h | 3 +- src/global/Constants.h | 2 +- test/ExportQueryExecutionTreeTest.cpp | 6 +- 8 files changed, 71 insertions(+), 94 deletions(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 02710ef86f..2141b7bbe8 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -150,7 +150,7 @@ Result CartesianProductJoin::computeResult( // Get all child results (possibly with limit, see above). for (auto& child : childView()) { - if (limitIfPresent.has_value() && child.supportsLimit()) { + if (limitIfPresent.has_value()) { child.setLimit(limitIfPresent.value()); } subResults.push_back(child.getResult()); diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 2f3da341a7..48967cad0d 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -14,13 +14,8 @@ // __________________________________________________________________________ namespace { -// Return a range that contains the indices of the rows that have to be exported -// from the `idTable` given the `LimitOffsetClause`. It takes into account the -// LIMIT, the OFFSET, and the actual size of the `idTable` -auto getRowIndices(const LimitOffsetClause& limitOffset, - const IdTable& idTable) { - return std::views::iota(limitOffset.actualOffset(idTable.size()), - limitOffset.upperBound(idTable.size())); +auto getRowIndices(const IdTable& idTable) { + return std::views::iota(0u, idTable.size()); } } // namespace @@ -29,10 +24,10 @@ cppcoro::generator ExportQueryExecutionTrees::constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr res, + std::shared_ptr result, CancellationHandle cancellationHandle) { - for (size_t i : getRowIndices(limitAndOffset, res->idTable())) { - ConstructQueryExportContext context{i, *res, qet.getVariableColumns(), + for (size_t i : getRowIndices(result->idTable())) { + ConstructQueryExportContext context{i, *result, qet.getVariableColumns(), qet.getQec()->getIndex()}; using enum PositionInTriple; for (const auto& triple : constructTriples) { @@ -56,13 +51,11 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { resultTable->logResultSize(); auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples( - qet, constructTriples, limitAndOffset, resultTable, - std::move(cancellationHandle)); + qet, constructTriples, resultTable, std::move(cancellationHandle)); for (const auto& triple : generator) { co_yield triple.subject_; co_yield ' '; @@ -91,11 +84,9 @@ nlohmann::json ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - const LimitOffsetClause& limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle) { - auto generator = constructQueryResultToTriples(qet, constructTriples, - limitAndOffset, std::move(res), - std::move(cancellationHandle)); + std::shared_ptr res, CancellationHandle cancellationHandle) { + auto generator = constructQueryResultToTriples( + qet, constructTriples, std::move(res), std::move(cancellationHandle)); std::vector> jsonArray; for (auto& triple : generator) { jsonArray.push_back({std::move(triple.subject_), @@ -107,15 +98,15 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( // __________________________________________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( - const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, + const QueryExecutionTree& qet, const QueryExecutionTree::ColumnIndicesAndTypes& columns, - std::shared_ptr resultTable, + std::shared_ptr result, CancellationHandle cancellationHandle) { - AD_CORRECTNESS_CHECK(resultTable != nullptr); - const IdTable& data = resultTable->idTable(); + AD_CORRECTNESS_CHECK(result != nullptr); + const IdTable& data = result->idTable(); nlohmann::json json = nlohmann::json::array(); - for (size_t rowIndex : getRowIndices(limitAndOffset, data)) { + for (size_t rowIndex : getRowIndices(data)) { // We need the explicit `array` constructor for the special case of zero // variables. json.push_back(nlohmann::json::array()); @@ -127,7 +118,7 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( } const auto& currentId = data(rowIndex, opt->columnIndex_); const auto& optionalStringAndXsdType = idToStringAndType( - qet.getQec()->getIndex(), currentId, resultTable->localVocab()); + qet.getQec()->getIndex(), currentId, result->localVocab()); if (!optionalStringAndXsdType.has_value()) { row.emplace_back(nullptr); continue; @@ -266,7 +257,6 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { using nlohmann::json; @@ -351,7 +341,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - for (size_t rowIndex : getRowIndices(limitAndOffset, idTable)) { + for (size_t rowIndex : getRowIndices(idTable)) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. nlohmann::ordered_json binding; @@ -386,7 +376,6 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(resultTable != nullptr); @@ -395,7 +384,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( qet.selectedVariablesToColumnIndices(selectClause, true); return ExportQueryExecutionTrees::idTableToQLeverJSONArray( - qet, limitAndOffset, selectedColumnIndices, std::move(resultTable), + qet, selectedColumnIndices, std::move(resultTable), std::move(cancellationHandle)); } @@ -407,7 +396,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle) { + CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::turtle); @@ -427,7 +416,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( const auto& idTable = resultTable->idTable(); // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (size_t i : getRowIndices(limitAndOffset, idTable)) { + for (size_t i : getRowIndices(idTable)) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { co_yield std::string_view{reinterpret_cast(&idTable( @@ -455,7 +444,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (size_t i : getRowIndices(limitAndOffset, idTable)) { + for (size_t i : getRowIndices(idTable)) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); @@ -551,7 +540,6 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle) { using namespace std::string_view_literals; co_yield "\n" @@ -579,7 +567,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (size_t i : getRowIndices(limitAndOffset, idTable)) { + for (size_t i : getRowIndices(idTable)) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { @@ -604,7 +592,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::sparqlXml); @@ -619,8 +607,7 @@ ExportQueryExecutionTrees::constructQueryResultToStream( : RdfEscaping::escapeForCsv; constexpr char sep = format == MediaType::tsv ? '\t' : ','; auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples( - qet, constructTriples, limitAndOffset, resultTable, - std::move(cancellationHandle)); + qet, constructTriples, resultTable, std::move(cancellationHandle)); for (auto& triple : generator) { co_yield escapeFunction(std::move(triple.subject_)); co_yield sep; @@ -634,7 +621,7 @@ ExportQueryExecutionTrees::constructQueryResultToStream( // _____________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, + const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle) { std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); @@ -657,23 +644,19 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( j["runtimeInformation"]["meta"] = nlohmann::ordered_json( qet.getRootOperation()->getRuntimeInfoWholeQuery()); RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo(); - runtimeInformation.addLimitOffsetRow( - query._limitOffset, std::chrono::milliseconds::zero(), false); runtimeInformation.addDetail("executed-implicitly-during-query-export", true); j["runtimeInformation"]["query_execution_tree"] = nlohmann::ordered_json(runtimeInformation); { - auto limitAndOffset = query._limitOffset; - limitAndOffset._limit = std::min(limitAndOffset.limitOrDefault(), maxSend); j["res"] = query.hasSelectClause() ? ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( - qet, query.selectClause(), limitAndOffset, - std::move(resultTable), std::move(cancellationHandle)) + qet, query.selectClause(), std::move(resultTable), + std::move(cancellationHandle)) : ExportQueryExecutionTrees:: constructQueryResultBindingsToQLeverJSON( - qet, query.constructClause().triples_, limitAndOffset, + qet, query.constructClause().triples_, std::move(resultTable), std::move(cancellationHandle)); } j["resultsize"] = query.hasSelectClause() ? resultSize : j["res"].size(); @@ -690,15 +673,13 @@ ExportQueryExecutionTrees::computeResultAsStream( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, ad_utility::MediaType mediaType, CancellationHandle cancellationHandle) { auto compute = [&] { - auto limitAndOffset = parsedQuery._limitOffset; return parsedQuery.hasSelectClause() ? ExportQueryExecutionTrees::selectQueryResultToStream( - qet, parsedQuery.selectClause(), limitAndOffset, + qet, parsedQuery.selectClause(), std::move(cancellationHandle)) : ExportQueryExecutionTrees::constructQueryResultToStream< format>(qet, parsedQuery.constructClause().triples_, - limitAndOffset, qet.getResult(), - std::move(cancellationHandle)); + qet.getResult(), std::move(cancellationHandle)); }; using enum MediaType; @@ -717,7 +698,7 @@ ExportQueryExecutionTrees::computeResultAsStream( // _____________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( - const ParsedQuery& query, const QueryExecutionTree& qet, uint64_t maxSend, + const ParsedQuery& query, const QueryExecutionTree& qet, CancellationHandle cancellationHandle) { if (!query.hasSelectClause()) { AD_THROW( @@ -726,10 +707,8 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); nlohmann::json j; - auto limitAndOffset = query._limitOffset; - limitAndOffset._limit = std::min(limitAndOffset.limitOrDefault(), maxSend); j = ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( - qet, query.selectClause(), limitAndOffset, std::move(resultTable), + qet, query.selectClause(), std::move(resultTable), std::move(cancellationHandle)); return j; } @@ -737,17 +716,17 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( // _____________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::computeResultAsJSON( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, - ad_utility::MediaType mediaType, CancellationHandle cancellationHandle) { + const ad_utility::Timer& requestTimer, ad_utility::MediaType mediaType, + CancellationHandle cancellationHandle) { try { switch (mediaType) { case ad_utility::MediaType::qleverJson: return computeQueryResultAsQLeverJSON(parsedQuery, qet, requestTimer, - maxSend, + std::move(cancellationHandle)); case ad_utility::MediaType::sparqlJson: return computeSelectQueryResultAsSparqlJSON( - parsedQuery, qet, maxSend, std::move(cancellationHandle)); + parsedQuery, qet, std::move(cancellationHandle)); default: AD_FAIL(); } diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 308c05a907..8958370b57 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -2,10 +2,7 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach -#include - #include "engine/QueryExecutionTree.h" -#include "parser/data/LimitOffsetClause.h" #include "util/CancellationHandle.h" #include "util/http/MediaTypes.h" #include "util/json.h" @@ -51,12 +48,11 @@ class ExportQueryExecutionTrees { // single JSON object that is fully materialized before the function returns. // The `requestTimer` is used to report timing statistics on the query. It // must have already run during the query planning to produce the expected - // results. If `maxSend` is smaller than the size of the query result, then - // only the first `maxSend` rows are returned. + // results. static nlohmann::json computeResultAsJSON( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, - MediaType mediaType, CancellationHandle cancellationHandle); + const ad_utility::Timer& requestTimer, MediaType mediaType, + CancellationHandle cancellationHandle); // Convert the `id` to a human-readable string. The `index` is used to resolve // `Id`s with datatype `VocabIndex` or `TextRecordIndex`. The `localVocab` is @@ -99,18 +95,17 @@ class ExportQueryExecutionTrees { // Similar to `queryToJSON`, but always returns the `QLeverJSON` format. static nlohmann::json computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, - const ad_utility::Timer& requestTimer, uint64_t maxSend, + const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle); // Similar to `queryToJSON`, but always returns the `SparqlJSON` format. static nlohmann::json computeSelectQueryResultAsSparqlJSON( - const ParsedQuery& query, const QueryExecutionTree& qet, uint64_t maxSend, + const ParsedQuery& query, const QueryExecutionTree& qet, CancellationHandle cancellationHandle); // ___________________________________________________________________________ static nlohmann::json selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -120,9 +115,6 @@ class ExportQueryExecutionTrees { * `computeQueryResultAsQLeverJSON` to obtain the "actual" query results * (without the meta data) * @param qet The `QueryExecutionTree` of the query. - * @param from the first entries of the idTable are skipped - * @param limitAndOffset at most entries are written, starting at - * * @param columns each pair of tells * us which columns are to be serialized in which order * @param resultTable The query result in the ID space. If it is `nullptr`, @@ -130,7 +122,7 @@ class ExportQueryExecutionTrees { * @return a 2D-Json array corresponding to the IdTable given the arguments */ static nlohmann::json idTableToQLeverJSONArray( - const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, + const QueryExecutionTree& qet, const QueryExecutionTree::ColumnIndicesAndTypes& columns, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -139,7 +131,6 @@ class ExportQueryExecutionTrees { static nlohmann::json constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - const LimitOffsetClause& limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle); // Generate an RDF graph for a CONSTRUCT query. @@ -147,14 +138,12 @@ class ExportQueryExecutionTrees { constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle); + std::shared_ptr res, CancellationHandle cancellationHandle); // ___________________________________________________________________________ static nlohmann::json selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -163,7 +152,6 @@ class ExportQueryExecutionTrees { static ad_utility::streams::stream_generator constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -172,5 +160,5 @@ class ExportQueryExecutionTrees { static ad_utility::streams::stream_generator selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); + CancellationHandle cancellationHandle); }; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index c96347057b..3f401dc903 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -153,9 +153,7 @@ std::vector QueryPlanner::createExecutionTrees( vector& lastRow = plans.back(); for (auto& plan : lastRow) { - if (plan._qet->getRootOperation()->supportsLimit()) { - plan._qet->getRootOperation()->setLimit(pq._limitOffset); - } + plan._qet->getRootOperation()->setLimit(pq._limitOffset); } AD_CONTRACT_CHECK(!lastRow.empty()); diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index d0940213eb..d53216d28e 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -616,8 +616,6 @@ boost::asio::awaitable Server::processQuery( const std::string& expected) { return params.contains(param) && params.at(param) == expected; }; - size_t maxSend = params.contains("send") ? std::stoul(params.at("send")) - : MAX_NOF_ROWS_IN_RESULT; const bool pinSubtrees = containsParam("pinsubtrees", "true"); const bool pinResult = containsParam("pinresult", "true"); LOG(INFO) << "Processing the following SPARQL query:" @@ -647,6 +645,14 @@ boost::asio::awaitable Server::processQuery( } else if (containsParam("action", "binary_export")) { mediaType = MediaType::octetStream; } + std::optional maxSend = + params.contains("send") ? std::optional{std::stoul(params.at("send"))} + : std::nullopt; + // Limit JSON requests by default + if (!maxSend.has_value() && (mediaType == MediaType::sparqlJson || + mediaType == MediaType::qleverJson)) { + maxSend = MAX_NOF_ROWS_IN_RESULT; + } std::string_view acceptHeader = request.base()[http::field::accept]; @@ -683,8 +689,8 @@ boost::asio::awaitable Server::processQuery( auto [cancellationHandle, cancelTimeoutOnDestruction] = setupCancellationHandle(messageSender.getQueryId(), timeLimit); - plannedQuery = - co_await parseAndPlan(query, qec, cancellationHandle, timeLimit); + plannedQuery = co_await parseAndPlan(query, qec, cancellationHandle, + timeLimit, maxSend); AD_CORRECTNESS_CHECK(plannedQuery.has_value()); auto& qet = plannedQuery.value().queryExecutionTree_; qet.isRoot() = true; // allow pinning of the final result @@ -713,10 +719,10 @@ boost::asio::awaitable Server::processQuery( case sparqlJson: { // Normal case: JSON response auto responseString = co_await computeInNewThread( - [&plannedQuery, &qet, &requestTimer, maxSend, mediaType, + [&plannedQuery, &qet, &requestTimer, mediaType, &cancellationHandle] { return ExportQueryExecutionTrees::computeResultAsJSON( - plannedQuery.value().parsedQuery_, qet, requestTimer, maxSend, + plannedQuery.value().parsedQuery_, qet, requestTimer, mediaType.value(), cancellationHandle); }, cancellationHandle); @@ -820,7 +826,8 @@ Awaitable Server::computeInNewThread(Function function, // _____________________________________________________________________________ net::awaitable> Server::parseAndPlan( const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit) { + SharedCancellationHandle handle, TimeLimit timeLimit, + std::optional maxSend) { auto handleCopy = handle; // The usage of an `optional` here is required because of a limitation in @@ -830,9 +837,13 @@ net::awaitable> Server::parseAndPlan( // probably related to issues in GCC's coroutine implementation. return computeInNewThread( [&query, &qec, enablePatternTrick = enablePatternTrick_, - handle = std::move(handle), - timeLimit]() mutable -> std::optional { + handle = std::move(handle), timeLimit, + maxSend]() mutable -> std::optional { auto pq = SparqlParser::parseQuery(query); + if (maxSend.has_value()) { + pq._limitOffset._limit = + std::min(maxSend.value(), pq._limitOffset.limitOrDefault()); + } handle->throwIfCancelled(); QueryPlanner qp(&qec, handle); qp.setEnablePatternTrick(enablePatternTrick); diff --git a/src/engine/Server.h b/src/engine/Server.h index 4c3deb0d67..a0c40be78e 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -183,7 +183,8 @@ class Server { /// technical reasons that are described in the definition of this function. net::awaitable> parseAndPlan( const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit); + SharedCancellationHandle handle, TimeLimit timeLimit, + std::optional maxSend); /// Acquire the `CancellationHandle` for the given `QueryId`, start the /// watchdog and call `cancelAfterDeadline` to set the timeout after diff --git a/src/global/Constants.h b/src/global/Constants.h index c7b68d5735..06b0e4b6ee 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -23,7 +23,7 @@ static const ad_utility::MemorySize STXXL_DISK_SIZE_INDEX_BUILDER = 1_GB; static constexpr ad_utility::MemorySize DEFAULT_MEM_FOR_QUERIES = 4_GB; -static const size_t MAX_NOF_ROWS_IN_RESULT = 1'000'000; +constexpr uint64_t MAX_NOF_ROWS_IN_RESULT = 1'000'000; static const size_t MIN_WORD_PREFIX_SIZE = 4; static const char PREFIX_CHAR = '*'; diff --git a/test/ExportQueryExecutionTreeTest.cpp b/test/ExportQueryExecutionTreeTest.cpp index 139c480916..0c993740a4 100644 --- a/test/ExportQueryExecutionTreeTest.cpp +++ b/test/ExportQueryExecutionTreeTest.cpp @@ -57,7 +57,7 @@ nlohmann::json runJSONQuery(const std::string& kg, const std::string& query, auto qet = qp.createExecutionTree(pq); ad_utility::Timer timer{ad_utility::Timer::Started}; return ExportQueryExecutionTrees::computeResultAsJSON( - pq, qet, timer, 200, mediaType, std::move(cancellationHandle)); + pq, qet, timer, mediaType, std::move(cancellationHandle)); } // A test case that tests the correct execution and exporting of a SELECT query @@ -862,8 +862,8 @@ TEST_P(JsonMediaTypesFixture, CancellationCancelsJson) { cancellationHandle->cancel(ad_utility::CancellationState::MANUAL); AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( ExportQueryExecutionTrees::computeResultAsJSON( - pq, qet, ad_utility::Timer{ad_utility::Timer::Started}, 200, - GetParam(), std::move(cancellationHandle)), + pq, qet, ad_utility::Timer{ad_utility::Timer::Started}, GetParam(), + std::move(cancellationHandle)), HasSubstr("Query export"), ad_utility::CancellationException); } INSTANTIATE_TEST_SUITE_P(JsonMediaTypes, JsonMediaTypesFixture, From b9ca4aa55d55de336c64f93868f594eeb54ffbcb Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 23 May 2024 19:25:04 +0200 Subject: [PATCH 043/133] Correct call order --- src/engine/Server.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index d53216d28e..eda328594b 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -645,6 +645,13 @@ boost::asio::awaitable Server::processQuery( } else if (containsParam("action", "binary_export")) { mediaType = MediaType::octetStream; } + + std::string_view acceptHeader = request.base()[http::field::accept]; + + if (!mediaType.has_value()) { + mediaType = ad_utility::getMediaTypeFromAcceptHeader(acceptHeader); + } + std::optional maxSend = params.contains("send") ? std::optional{std::stoul(params.at("send"))} : std::nullopt; @@ -654,12 +661,6 @@ boost::asio::awaitable Server::processQuery( maxSend = MAX_NOF_ROWS_IN_RESULT; } - std::string_view acceptHeader = request.base()[http::field::accept]; - - if (!mediaType.has_value()) { - mediaType = ad_utility::getMediaTypeFromAcceptHeader(acceptHeader); - } - if (!mediaType.has_value()) { co_return co_await send(createBadRequestResponse( absl::StrCat("Did not find any supported media type " From 43dddd0ef18c3e511d36bd5aff57a933d10d3baf Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 23 May 2024 19:25:04 +0200 Subject: [PATCH 044/133] Correct call order --- src/engine/Server.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index d53216d28e..eda328594b 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -645,6 +645,13 @@ boost::asio::awaitable Server::processQuery( } else if (containsParam("action", "binary_export")) { mediaType = MediaType::octetStream; } + + std::string_view acceptHeader = request.base()[http::field::accept]; + + if (!mediaType.has_value()) { + mediaType = ad_utility::getMediaTypeFromAcceptHeader(acceptHeader); + } + std::optional maxSend = params.contains("send") ? std::optional{std::stoul(params.at("send"))} : std::nullopt; @@ -654,12 +661,6 @@ boost::asio::awaitable Server::processQuery( maxSend = MAX_NOF_ROWS_IN_RESULT; } - std::string_view acceptHeader = request.base()[http::field::accept]; - - if (!mediaType.has_value()) { - mediaType = ad_utility::getMediaTypeFromAcceptHeader(acceptHeader); - } - if (!mediaType.has_value()) { co_return co_await send(createBadRequestResponse( absl::StrCat("Did not find any supported media type " From 4ac7892ba25972a33dade8a4182ff71c8faeaa8e Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 28 May 2024 00:02:05 +0200 Subject: [PATCH 045/133] Rethink approach to apply limits and offset --- src/engine/CartesianProductJoin.cpp | 2 +- src/engine/ExportQueryExecutionTrees.cpp | 103 +++++++++++++---------- src/engine/ExportQueryExecutionTrees.h | 13 ++- src/engine/Operation.cpp | 17 +--- src/engine/QueryPlanner.cpp | 4 +- src/engine/Result.cpp | 22 +---- src/engine/Result.h | 13 +-- src/engine/Server.cpp | 22 ++--- src/engine/Server.h | 3 +- 9 files changed, 101 insertions(+), 98 deletions(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 2141b7bbe8..02710ef86f 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -150,7 +150,7 @@ Result CartesianProductJoin::computeResult( // Get all child results (possibly with limit, see above). for (auto& child : childView()) { - if (limitIfPresent.has_value()) { + if (limitIfPresent.has_value() && child.supportsLimit()) { child.setLimit(limitIfPresent.value()); } subResults.push_back(child.getResult()); diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 48967cad0d..740300fa28 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -14,8 +14,17 @@ // __________________________________________________________________________ namespace { -auto getRowIndices(const IdTable& idTable) { - return std::views::iota(0u, idTable.size()); +// Return a range that contains the indices of the rows that have to be exported +// from the `idTable` given the `LimitOffsetClause`. It takes into account the +// LIMIT, the OFFSET, and the actual size of the `idTable` +auto getRowIndices(const LimitOffsetClause& limitOffset, const Result& result) { + const IdTable& idTable = result.idTable(); + if (result.offsetAndLimitApplied()) { + // Don't apply offset twice if already applied + return std::views::iota(0ul, limitOffset.actualSize(idTable.size())); + } + return std::views::iota(limitOffset.actualOffset(idTable.size()), + limitOffset.upperBound(idTable.size())); } } // namespace @@ -24,10 +33,10 @@ cppcoro::generator ExportQueryExecutionTrees::constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - std::shared_ptr result, + LimitOffsetClause limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle) { - for (size_t i : getRowIndices(result->idTable())) { - ConstructQueryExportContext context{i, *result, qet.getVariableColumns(), + for (size_t i : getRowIndices(limitAndOffset, *res)) { + ConstructQueryExportContext context{i, *res, qet.getVariableColumns(), qet.getQec()->getIndex()}; using enum PositionInTriple; for (const auto& triple : constructTriples) { @@ -51,11 +60,13 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, + LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { resultTable->logResultSize(); auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples( - qet, constructTriples, resultTable, std::move(cancellationHandle)); + qet, constructTriples, limitAndOffset, resultTable, + std::move(cancellationHandle)); for (const auto& triple : generator) { co_yield triple.subject_; co_yield ' '; @@ -84,9 +95,11 @@ nlohmann::json ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - std::shared_ptr res, CancellationHandle cancellationHandle) { - auto generator = constructQueryResultToTriples( - qet, constructTriples, std::move(res), std::move(cancellationHandle)); + const LimitOffsetClause& limitAndOffset, std::shared_ptr res, + CancellationHandle cancellationHandle) { + auto generator = constructQueryResultToTriples(qet, constructTriples, + limitAndOffset, std::move(res), + std::move(cancellationHandle)); std::vector> jsonArray; for (auto& triple : generator) { jsonArray.push_back({std::move(triple.subject_), @@ -98,15 +111,15 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( // __________________________________________________________________________________________________________ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( - const QueryExecutionTree& qet, + const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, - std::shared_ptr result, + std::shared_ptr resultTable, CancellationHandle cancellationHandle) { - AD_CORRECTNESS_CHECK(result != nullptr); - const IdTable& data = result->idTable(); + AD_CORRECTNESS_CHECK(resultTable != nullptr); + const IdTable& data = resultTable->idTable(); nlohmann::json json = nlohmann::json::array(); - for (size_t rowIndex : getRowIndices(data)) { + for (size_t rowIndex : getRowIndices(limitAndOffset, *resultTable)) { // We need the explicit `array` constructor for the special case of zero // variables. json.push_back(nlohmann::json::array()); @@ -118,7 +131,7 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( } const auto& currentId = data(rowIndex, opt->columnIndex_); const auto& optionalStringAndXsdType = idToStringAndType( - qet.getQec()->getIndex(), currentId, result->localVocab()); + qet.getQec()->getIndex(), currentId, resultTable->localVocab()); if (!optionalStringAndXsdType.has_value()) { row.emplace_back(nullptr); continue; @@ -257,6 +270,7 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, + const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { using nlohmann::json; @@ -341,7 +355,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - for (size_t rowIndex : getRowIndices(idTable)) { + for (size_t rowIndex : getRowIndices(limitAndOffset, *resultTable)) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. nlohmann::ordered_json binding; @@ -376,6 +390,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, + const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(resultTable != nullptr); @@ -383,9 +398,9 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( QueryExecutionTree::ColumnIndicesAndTypes selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, true); - return ExportQueryExecutionTrees::idTableToQLeverJSONArray( - qet, selectedColumnIndices, std::move(resultTable), - std::move(cancellationHandle)); + return idTableToQLeverJSONArray(qet, limitAndOffset, selectedColumnIndices, + std::move(resultTable), + std::move(cancellationHandle)); } using parsedQuery::SelectClause; @@ -396,7 +411,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - CancellationHandle cancellationHandle) { + LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::turtle); @@ -416,7 +431,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( const auto& idTable = resultTable->idTable(); // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (size_t i : getRowIndices(idTable)) { + for (size_t i : getRowIndices(limitAndOffset, *resultTable)) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { co_yield std::string_view{reinterpret_cast(&idTable( @@ -444,7 +459,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (size_t i : getRowIndices(idTable)) { + for (size_t i : getRowIndices(limitAndOffset, *resultTable)) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); @@ -540,6 +555,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, + LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle) { using namespace std::string_view_literals; co_yield "\n" @@ -567,7 +583,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (size_t i : getRowIndices(idTable)) { + for (size_t i : getRowIndices(limitAndOffset, *resultTable)) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { @@ -592,7 +608,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - std::shared_ptr resultTable, + LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::sparqlXml); @@ -606,8 +622,9 @@ ExportQueryExecutionTrees::constructQueryResultToStream( ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; constexpr char sep = format == MediaType::tsv ? '\t' : ','; - auto generator = ExportQueryExecutionTrees::constructQueryResultToTriples( - qet, constructTriples, resultTable, std::move(cancellationHandle)); + auto generator = + constructQueryResultToTriples(qet, constructTriples, limitAndOffset, + resultTable, std::move(cancellationHandle)); for (auto& triple : generator) { co_yield escapeFunction(std::move(triple.subject_)); co_yield sep; @@ -623,11 +640,11 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle) { - std::shared_ptr resultTable = qet.getResult(); - resultTable->logResultSize(); + std::shared_ptr result = qet.getResult(); + result->logResultSize(); auto timeResultComputation = requestTimer.msecs(); - size_t resultSize = resultTable->idTable().size(); + size_t resultSize = result->idTable().size(); nlohmann::json j; @@ -644,6 +661,8 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( j["runtimeInformation"]["meta"] = nlohmann::ordered_json( qet.getRootOperation()->getRuntimeInfoWholeQuery()); RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo(); + runtimeInformation.addLimitOffsetRow( + query._limitOffset, std::chrono::milliseconds::zero(), false); runtimeInformation.addDetail("executed-implicitly-during-query-export", true); j["runtimeInformation"]["query_execution_tree"] = nlohmann::ordered_json(runtimeInformation); @@ -651,13 +670,12 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( { j["res"] = query.hasSelectClause() - ? ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( - qet, query.selectClause(), std::move(resultTable), - std::move(cancellationHandle)) - : ExportQueryExecutionTrees:: - constructQueryResultBindingsToQLeverJSON( - qet, query.constructClause().triples_, - std::move(resultTable), std::move(cancellationHandle)); + ? selectQueryResultBindingsToQLeverJSON( + qet, query.selectClause(), query._limitOffset, + std::move(result), std::move(cancellationHandle)) + : constructQueryResultBindingsToQLeverJSON( + qet, query.constructClause().triples_, query._limitOffset, + std::move(result), std::move(cancellationHandle)); } j["resultsize"] = query.hasSelectClause() ? resultSize : j["res"].size(); j["time"]["total"] = std::to_string(requestTimer.msecs().count()) + "ms"; @@ -673,13 +691,15 @@ ExportQueryExecutionTrees::computeResultAsStream( const ParsedQuery& parsedQuery, const QueryExecutionTree& qet, ad_utility::MediaType mediaType, CancellationHandle cancellationHandle) { auto compute = [&] { + auto limitAndOffset = parsedQuery._limitOffset; return parsedQuery.hasSelectClause() ? ExportQueryExecutionTrees::selectQueryResultToStream( - qet, parsedQuery.selectClause(), + qet, parsedQuery.selectClause(), limitAndOffset, std::move(cancellationHandle)) : ExportQueryExecutionTrees::constructQueryResultToStream< format>(qet, parsedQuery.constructClause().triples_, - qet.getResult(), std::move(cancellationHandle)); + limitAndOffset, qet.getResult(), + std::move(cancellationHandle)); }; using enum MediaType; @@ -706,11 +726,9 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( } std::shared_ptr resultTable = qet.getResult(); resultTable->logResultSize(); - nlohmann::json j; - j = ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( - qet, query.selectClause(), std::move(resultTable), + return selectQueryResultToSparqlJSON( + qet, query.selectClause(), query._limitOffset, std::move(resultTable), std::move(cancellationHandle)); - return j; } // _____________________________________________________________________________ @@ -722,7 +740,6 @@ nlohmann::json ExportQueryExecutionTrees::computeResultAsJSON( switch (mediaType) { case ad_utility::MediaType::qleverJson: return computeQueryResultAsQLeverJSON(parsedQuery, qet, requestTimer, - std::move(cancellationHandle)); case ad_utility::MediaType::sparqlJson: return computeSelectQueryResultAsSparqlJSON( diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 8958370b57..dff5ce9288 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -106,6 +106,7 @@ class ExportQueryExecutionTrees { static nlohmann::json selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, + const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -115,6 +116,8 @@ class ExportQueryExecutionTrees { * `computeQueryResultAsQLeverJSON` to obtain the "actual" query results * (without the meta data) * @param qet The `QueryExecutionTree` of the query. + * @param limitAndOffset at most entries are written, starting at + * * @param columns each pair of tells * us which columns are to be serialized in which order * @param resultTable The query result in the ID space. If it is `nullptr`, @@ -122,7 +125,7 @@ class ExportQueryExecutionTrees { * @return a 2D-Json array corresponding to the IdTable given the arguments */ static nlohmann::json idTableToQLeverJSONArray( - const QueryExecutionTree& qet, + const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -131,6 +134,7 @@ class ExportQueryExecutionTrees { static nlohmann::json constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, + const LimitOffsetClause& limitAndOffset, std::shared_ptr res, CancellationHandle cancellationHandle); // Generate an RDF graph for a CONSTRUCT query. @@ -138,12 +142,14 @@ class ExportQueryExecutionTrees { constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - std::shared_ptr res, CancellationHandle cancellationHandle); + LimitOffsetClause limitAndOffset, std::shared_ptr res, + CancellationHandle cancellationHandle); // ___________________________________________________________________________ static nlohmann::json selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, + const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -152,6 +158,7 @@ class ExportQueryExecutionTrees { static ad_utility::streams::stream_generator constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, + LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, CancellationHandle cancellationHandle); @@ -160,5 +167,5 @@ class ExportQueryExecutionTrees { static ad_utility::streams::stream_generator selectQueryResultToStream( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, - CancellationHandle cancellationHandle); + LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); }; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 6b795a5e5f..bc878153f7 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -144,19 +144,10 @@ std::shared_ptr Operation::getResult( updateRuntimeInformationOnSuccess(result, ad_utility::CacheStatus::computed, timer.msecs(), std::nullopt); - // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not - // already perform it. An example for an operation that directly computes - // the Limit is a full index scan with three variables. - if (!supportsLimit()) { - ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; - // Note: both of the following calls have no effect and negligible - // runtime if neither a LIMIT nor an OFFSET were specified. - result.applyLimitOffset(_limit); - runtimeInfo().addLimitOffsetRow(_limit, limitTimer.msecs(), true); - } else { - AD_CONTRACT_CHECK(result.idTable().numRows() == - _limit.actualSize(result.idTable().numRows())); - } + // This will hold true even when supportsLimit() returns false, + // because in this case `_limit` won't have been set on this operation. + AD_CONTRACT_CHECK(result.idTable().numRows() == + _limit.actualSize(result.idTable().numRows())); return CacheValue{std::move(result), runtimeInfo()}; }; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 3f401dc903..c96347057b 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -153,7 +153,9 @@ std::vector QueryPlanner::createExecutionTrees( vector& lastRow = plans.back(); for (auto& plan : lastRow) { - plan._qet->getRootOperation()->setLimit(pq._limitOffset); + if (plan._qet->getRootOperation()->supportsLimit()) { + plan._qet->getRootOperation()->setLimit(pq._limitOffset); + } } AD_CONTRACT_CHECK(!lastRow.empty()); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index ab80343d65..31401492b8 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -62,25 +62,6 @@ Result::Result(IdTable idTable, std::vector sortedBy, : Result(std::move(idTable), std::move(sortedBy), SharedLocalVocabWrapper{std::move(localVocab)}) {} -// _____________________________________________________________________________ -void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { - // Apply the OFFSET clause. If the offset is `0` or the offset is larger - // than the size of the `IdTable`, then this has no effect and runtime - // `O(1)` (see the docs for `std::shift_left`). - std::ranges::for_each( - idTable_.getColumns(), - [offset = limitOffset.actualOffset(idTable_.numRows()), - upperBound = - limitOffset.upperBound(idTable_.numRows())](std::span column) { - std::shift_left(column.begin(), column.begin() + upperBound, offset); - }); - // Resize the `IdTable` if necessary. - size_t targetSize = limitOffset.actualSize(idTable_.numRows()); - AD_CORRECTNESS_CHECK(targetSize <= idTable_.numRows()); - idTable_.resize(targetSize); - idTable_.shrinkToFit(); -} - // _____________________________________________________________________________ auto Result::getOrComputeDatatypeCountsPerColumn() -> const DatatypeCountsPerColumn& { @@ -119,3 +100,6 @@ void Result::logResultSize() const { LOG(INFO) << "Result has size " << idTable().size() << " x " << idTable().numColumns() << std::endl; } + +// _____________________________________________________________________________ +bool Result::offsetAndLimitApplied() const { return offsetAndLimitApplied_; } diff --git a/src/engine/Result.h b/src/engine/Result.h index 0ccc57ca61..966fc2bc1a 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -31,6 +31,11 @@ class Result { // The local vocabulary of the result. LocalVocabPtr localVocab_ = std::make_shared(); + // Currently unused flag to indicate if the operation creating this result + // already applied an offset and limit provided by the limit, so a consumer of + // this result know if it needs to skips those entries. + bool offsetAndLimitApplied_ = false; + // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those remain valid after calling non-const function like @@ -149,12 +154,6 @@ class Result { // The first rows of the result and its total size (for debugging). string asDebugString() const; - // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. - // Note: If additional members and invariants are added to the class (for - // example information about the datatypes in each column) make sure that - // those are still correct after performing this operation. - void applyLimitOffset(const LimitOffsetClause& limitOffset); - // Get the information, which columns stores how many entries of each // datatype. This information is computed on the first call to this function // `O(num-entries-in-table)` and then cached for subsequent usages. @@ -165,4 +164,6 @@ class Result { // undefined values in the `_idTable` of this result. Return `true` iff the // check is succesful. bool checkDefinedness(const VariableToColumnMap& varColMap); + + bool offsetAndLimitApplied() const; }; diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index eda328594b..728b7396e7 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -690,8 +690,8 @@ boost::asio::awaitable Server::processQuery( auto [cancellationHandle, cancelTimeoutOnDestruction] = setupCancellationHandle(messageSender.getQueryId(), timeLimit); - plannedQuery = co_await parseAndPlan(query, qec, cancellationHandle, - timeLimit, maxSend); + plannedQuery = + co_await parseAndPlan(query, qec, cancellationHandle, timeLimit); AD_CORRECTNESS_CHECK(plannedQuery.has_value()); auto& qet = plannedQuery.value().queryExecutionTree_; qet.isRoot() = true; // allow pinning of the final result @@ -703,6 +703,13 @@ boost::asio::awaitable Server::processQuery( << " ms" << std::endl; LOG(TRACE) << qet.getCacheKey() << std::endl; + // Apply stricter limit for export if present + if (maxSend.has_value()) { + auto& pq = plannedQuery.value().parsedQuery_; + pq._limitOffset._limit = + std::min(maxSend.value(), pq._limitOffset.limitOrDefault()); + } + // This actually processes the query and sends the result in the requested // format. switch (mediaType.value()) { @@ -827,8 +834,7 @@ Awaitable Server::computeInNewThread(Function function, // _____________________________________________________________________________ net::awaitable> Server::parseAndPlan( const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit, - std::optional maxSend) { + SharedCancellationHandle handle, TimeLimit timeLimit) { auto handleCopy = handle; // The usage of an `optional` here is required because of a limitation in @@ -838,13 +844,9 @@ net::awaitable> Server::parseAndPlan( // probably related to issues in GCC's coroutine implementation. return computeInNewThread( [&query, &qec, enablePatternTrick = enablePatternTrick_, - handle = std::move(handle), timeLimit, - maxSend]() mutable -> std::optional { + handle = std::move(handle), + timeLimit]() mutable -> std::optional { auto pq = SparqlParser::parseQuery(query); - if (maxSend.has_value()) { - pq._limitOffset._limit = - std::min(maxSend.value(), pq._limitOffset.limitOrDefault()); - } handle->throwIfCancelled(); QueryPlanner qp(&qec, handle); qp.setEnablePatternTrick(enablePatternTrick); diff --git a/src/engine/Server.h b/src/engine/Server.h index a0c40be78e..4c3deb0d67 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -183,8 +183,7 @@ class Server { /// technical reasons that are described in the definition of this function. net::awaitable> parseAndPlan( const std::string& query, QueryExecutionContext& qec, - SharedCancellationHandle handle, TimeLimit timeLimit, - std::optional maxSend); + SharedCancellationHandle handle, TimeLimit timeLimit); /// Acquire the `CancellationHandle` for the given `QueryId`, start the /// watchdog and call `cancelAfterDeadline` to set the timeout after From ef17e67829b6ad8e375fdbab092ff818fe6dc310 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 28 May 2024 00:18:35 +0200 Subject: [PATCH 046/133] Add back headers --- src/engine/ExportQueryExecutionTrees.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index dff5ce9288..efb3f893d7 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -2,13 +2,14 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach +#pragma once + #include "engine/QueryExecutionTree.h" +#include "parser/data/LimitOffsetClause.h" #include "util/CancellationHandle.h" #include "util/http/MediaTypes.h" #include "util/json.h" -#pragma once - // This class contains all the functionality to convert a query that has already // been parsed (by the SPARQL parser) and planned (by the query planner) into // a serialized result. In particular, it creates TSV, CSV, Turtle, JSON (SPARQL From aabb81b7f5b005b24d6bacf457ea06d062f9c52c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 1 Jun 2024 16:53:24 +0200 Subject: [PATCH 047/133] Add back result limiter for subqueries --- src/engine/ExportQueryExecutionTrees.cpp | 4 ---- src/engine/Operation.cpp | 22 ++++++++++++++++++---- src/engine/Operation.h | 4 ++-- src/engine/Result.cpp | 22 +++++++++++++++++++--- src/engine/Result.h | 13 ++++++------- src/engine/Server.cpp | 8 ++++++++ 6 files changed, 53 insertions(+), 20 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 740300fa28..b17dc9f111 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -19,10 +19,6 @@ namespace { // LIMIT, the OFFSET, and the actual size of the `idTable` auto getRowIndices(const LimitOffsetClause& limitOffset, const Result& result) { const IdTable& idTable = result.idTable(); - if (result.offsetAndLimitApplied()) { - // Don't apply offset twice if already applied - return std::views::iota(0ul, limitOffset.actualSize(idTable.size())); - } return std::views::iota(limitOffset.actualOffset(idTable.size()), limitOffset.upperBound(idTable.size())); } diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index bc878153f7..01b4f38461 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -144,10 +144,24 @@ std::shared_ptr Operation::getResult( updateRuntimeInformationOnSuccess(result, ad_utility::CacheStatus::computed, timer.msecs(), std::nullopt); - // This will hold true even when supportsLimit() returns false, - // because in this case `_limit` won't have been set on this operation. - AD_CONTRACT_CHECK(result.idTable().numRows() == - _limit.actualSize(result.idTable().numRows())); + // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not + // already perform it. An example for an operation that directly computes + // the Limit is a full index scan with three variables. Note that the + // `QueryPlanner` does currently only set the limit for operations that + // support it natively, except for operations in subqueries. This means + // that a lot of the time the limit is only artificially applied during + // export, allowing the cache to re-use the same operation for different + // limits and offsets. + if (!supportsLimit()) { + ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; + // Note: both of the following calls have no effect and negligible + // runtime if neither a LIMIT nor an OFFSET were specified. + result.applyLimitOffset(_limit); + runtimeInfo().addLimitOffsetRow(_limit, limitTimer.msecs(), true); + } else { + AD_CONTRACT_CHECK(result.idTable().numRows() == + _limit.actualSize(result.idTable().numRows())); + } return CacheValue{std::move(result), runtimeInfo()}; }; diff --git a/src/engine/Operation.h b/src/engine/Operation.h index e62b063fd5..02f94dc155 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -209,6 +209,8 @@ class Operation { return computeResult(requestLaziness); } + const auto& getLimit() const { return _limit; } + protected: // The QueryExecutionContext for this particular element. // No ownership. @@ -220,8 +222,6 @@ class Operation { */ [[nodiscard]] virtual vector resultSortedOn() const = 0; - const auto& getLimit() const { return _limit; } - /// interface to the generated warnings of this operation std::vector& getWarnings() { return _warnings; } [[nodiscard]] const std::vector& getWarnings() const { diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 31401492b8..ab80343d65 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -62,6 +62,25 @@ Result::Result(IdTable idTable, std::vector sortedBy, : Result(std::move(idTable), std::move(sortedBy), SharedLocalVocabWrapper{std::move(localVocab)}) {} +// _____________________________________________________________________________ +void Result::applyLimitOffset(const LimitOffsetClause& limitOffset) { + // Apply the OFFSET clause. If the offset is `0` or the offset is larger + // than the size of the `IdTable`, then this has no effect and runtime + // `O(1)` (see the docs for `std::shift_left`). + std::ranges::for_each( + idTable_.getColumns(), + [offset = limitOffset.actualOffset(idTable_.numRows()), + upperBound = + limitOffset.upperBound(idTable_.numRows())](std::span column) { + std::shift_left(column.begin(), column.begin() + upperBound, offset); + }); + // Resize the `IdTable` if necessary. + size_t targetSize = limitOffset.actualSize(idTable_.numRows()); + AD_CORRECTNESS_CHECK(targetSize <= idTable_.numRows()); + idTable_.resize(targetSize); + idTable_.shrinkToFit(); +} + // _____________________________________________________________________________ auto Result::getOrComputeDatatypeCountsPerColumn() -> const DatatypeCountsPerColumn& { @@ -100,6 +119,3 @@ void Result::logResultSize() const { LOG(INFO) << "Result has size " << idTable().size() << " x " << idTable().numColumns() << std::endl; } - -// _____________________________________________________________________________ -bool Result::offsetAndLimitApplied() const { return offsetAndLimitApplied_; } diff --git a/src/engine/Result.h b/src/engine/Result.h index 966fc2bc1a..0ccc57ca61 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -31,11 +31,6 @@ class Result { // The local vocabulary of the result. LocalVocabPtr localVocab_ = std::make_shared(); - // Currently unused flag to indicate if the operation creating this result - // already applied an offset and limit provided by the limit, so a consumer of - // this result know if it needs to skips those entries. - bool offsetAndLimitApplied_ = false; - // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those remain valid after calling non-const function like @@ -154,6 +149,12 @@ class Result { // The first rows of the result and its total size (for debugging). string asDebugString() const; + // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. + // Note: If additional members and invariants are added to the class (for + // example information about the datatypes in each column) make sure that + // those are still correct after performing this operation. + void applyLimitOffset(const LimitOffsetClause& limitOffset); + // Get the information, which columns stores how many entries of each // datatype. This information is computed on the first call to this function // `O(num-entries-in-table)` and then cached for subsequent usages. @@ -164,6 +165,4 @@ class Result { // undefined values in the `_idTable` of this result. Return `true` iff the // check is succesful. bool checkDefinedness(const VariableToColumnMap& varColMap); - - bool offsetAndLimitApplied() const; }; diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index 728b7396e7..6be568d418 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -709,6 +709,14 @@ boost::asio::awaitable Server::processQuery( pq._limitOffset._limit = std::min(maxSend.value(), pq._limitOffset.limitOrDefault()); } + // Make sure we don't underflow here + AD_CORRECTNESS_CHECK( + plannedQuery.value().parsedQuery_._limitOffset._offset >= + qet.getRootOperation()->getLimit()._offset); + // Don't apply offset twice, if the offset was not applied to the operation + // then the exporter can safely apply it during export. + plannedQuery.value().parsedQuery_._limitOffset._offset -= + qet.getRootOperation()->getLimit()._offset; // This actually processes the query and sends the result in the requested // format. From 66a38b410106d82d9e999c33ad9516d21ebe5c2c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 1 Jun 2024 17:49:25 +0200 Subject: [PATCH 048/133] Try to fix subtle bug with runtime information detail --- src/engine/ExportQueryExecutionTrees.cpp | 1 - src/engine/RuntimeInformation.cpp | 1 + test/RuntimeInformationTest.cpp | 14 ++++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index b17dc9f111..955c5a2151 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -659,7 +659,6 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo(); runtimeInformation.addLimitOffsetRow( query._limitOffset, std::chrono::milliseconds::zero(), false); - runtimeInformation.addDetail("executed-implicitly-during-query-export", true); j["runtimeInformation"]["query_execution_tree"] = nlohmann::ordered_json(runtimeInformation); diff --git a/src/engine/RuntimeInformation.cpp b/src/engine/RuntimeInformation.cpp index 840e25faed..f4d2e898ae 100644 --- a/src/engine/RuntimeInformation.cpp +++ b/src/engine/RuntimeInformation.cpp @@ -236,6 +236,7 @@ void RuntimeInformation::addLimitOffsetRow(const LimitOffsetClause& l, totalTime_ += timeForLimit; actualOperation->addDetail("not-written-to-cache-because-child-of-limit", fullResultIsNotCached); + addDetail("executed-implicitly-during-query-export", !fullResultIsNotCached); sizeEstimate_ = l.actualSize(sizeEstimate_); // Update the descriptor. diff --git a/test/RuntimeInformationTest.cpp b/test/RuntimeInformationTest.cpp index ea6254ae78..885ca3066c 100644 --- a/test/RuntimeInformationTest.cpp +++ b/test/RuntimeInformationTest.cpp @@ -16,6 +16,18 @@ TEST(RuntimeInformation, addLimitOffsetRow) { rti.totalTime_ = 4ms; rti.sizeEstimate_ = 34; + rti.addLimitOffsetRow(LimitOffsetClause{}, 5ms, true); + EXPECT_FALSE( + rti.details_.contains("not-written-to-cache-because-child-of-limit")); + EXPECT_FALSE( + rti.details_.contains("executed-implicitly-during-query-export")); + + rti.addLimitOffsetRow(LimitOffsetClause{}, 5ms, false); + EXPECT_FALSE( + rti.details_.contains("not-written-to-cache-because-child-of-limit")); + EXPECT_FALSE( + rti.details_.contains("executed-implicitly-during-query-export")); + rti.addLimitOffsetRow(LimitOffsetClause{23, 1, 4}, 20ms, true); EXPECT_EQ(rti.descriptor_, "LIMIT 23 OFFSET 4"); EXPECT_EQ(rti.totalTime_, 24ms); @@ -27,10 +39,12 @@ TEST(RuntimeInformation, addLimitOffsetRow) { EXPECT_EQ(child.totalTime_, 4ms); EXPECT_EQ(child.getOperationTime(), 4ms); EXPECT_TRUE(child.details_.at("not-written-to-cache-because-child-of-limit")); + EXPECT_FALSE(rti.details_.at("executed-implicitly-during-query-export")); rti.addLimitOffsetRow(LimitOffsetClause{std::nullopt, 1, 17}, 15ms, false); EXPECT_FALSE(rti.children_.at(0)->details_.at( "not-written-to-cache-because-child-of-limit")); + EXPECT_TRUE(rti.details_.at("executed-implicitly-during-query-export")); EXPECT_EQ(rti.descriptor_, "OFFSET 17"); rti.addLimitOffsetRow(LimitOffsetClause{42, 1, 0}, 15ms, true); From 9f17e07ac0bd86bd44b7706a0b237c9d937268c1 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 5 Jun 2024 18:30:06 +0200 Subject: [PATCH 049/133] Add back comment --- src/util/ConcurrentCache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 7b46e6e528..8a930d3dc9 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -344,6 +344,7 @@ class ConcurrentCache { } else if (onlyReadFromCache) { return {nullptr, CacheStatus::notInCacheAndNotComputed}; } else if (lockPtr->_inProgress.contains(key)) { + // the result is not cached, but someone else is computing it. // it is important, that we do not immediately call getResult() since // this call blocks and we currently hold a lock. From 389f3f18abaccb62fd723fcf2a76ecc4e0b94b7a Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 5 Jun 2024 18:30:32 +0200 Subject: [PATCH 050/133] Rename `resultTable` -> `result` --- src/engine/ExportQueryExecutionTrees.cpp | 67 ++++++++++++------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 38c40ab23c..d2ce85ac1a 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -82,13 +82,12 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, - std::shared_ptr resultTable, + LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { - resultTable->logResultSize(); + result->logResultSize(); auto generator = constructQueryResultToTriples(qet, constructTriples, limitAndOffset, - resultTable, std::move(cancellationHandle)); + result, std::move(cancellationHandle)); for (const auto& triple : generator) { co_yield triple.subject_; co_yield ' '; @@ -292,11 +291,11 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - std::shared_ptr resultTable, + std::shared_ptr result, CancellationHandle cancellationHandle) { using nlohmann::json; - AD_CORRECTNESS_CHECK(resultTable != nullptr); + AD_CORRECTNESS_CHECK(result != nullptr); LOG(DEBUG) << "Finished computing the query result in the ID space. " "Resolving strings in result...\n"; @@ -307,7 +306,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( std::erase(columns, std::nullopt); - json result; + json resultJson; std::vector selectedVars = selectClause.getSelectedVariablesAsStrings(); // Strip the leading '?' from the variables, it is not part of the SPARQL JSON @@ -317,7 +316,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( var = var.substr(1); } } - result["head"]["vars"] = selectedVars; + resultJson["head"]["vars"] = selectedVars; json bindings = json::array(); @@ -326,8 +325,8 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( LOG(WARN) << "Exporting a SPARQL query where none of the selected " "variables is bound in the query" << std::endl; - result["results"]["bindings"] = json::array(); - return result; + resultJson["results"]["bindings"] = json::array(); + return resultJson; } // Take a string from the vocabulary, deduce the type and @@ -374,14 +373,14 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *resultTable)) { + for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *result)) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. nlohmann::ordered_json binding; for (const auto& column : columns) { const auto& currentId = idTable(rowIndex, column->columnIndex_); const auto& optionalValue = idToStringAndType( - qet.getQec()->getIndex(), currentId, resultTable->localVocab()); + qet.getQec()->getIndex(), currentId, result->localVocab()); if (!optionalValue.has_value()) { continue; } @@ -401,8 +400,8 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( bindings.emplace_back(std::move(binding)); cancellationHandle->throwIfCancelled(); } - result["results"]["bindings"] = std::move(bindings); - return result; + resultJson["results"]["bindings"] = std::move(bindings); + return resultJson; } // _____________________________________________________________________________ @@ -410,15 +409,15 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - std::shared_ptr resultTable, + std::shared_ptr result, CancellationHandle cancellationHandle) { - AD_CORRECTNESS_CHECK(resultTable != nullptr); + AD_CORRECTNESS_CHECK(result != nullptr); LOG(DEBUG) << "Resolving strings for finished binary result...\n"; QueryExecutionTree::ColumnIndicesAndTypes selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, true); return idTableToQLeverJSONArray(qet, limitAndOffset, selectedColumnIndices, - std::move(resultTable), + std::move(result), std::move(cancellationHandle)); } @@ -440,8 +439,8 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // This call triggers the possibly expensive computation of the query result // unless the result is already cached. - std::shared_ptr resultTable = qet.getResult(); - resultTable->logResultSize(); + std::shared_ptr result = qet.getResult(); + result->logResultSize(); LOG(DEBUG) << "Converting result IDs to their corresponding strings ..." << std::endl; auto selectedColumnIndices = @@ -449,7 +448,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (auto [i, idTable] : getRowIndices(limitAndOffset, *resultTable)) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { co_yield std::string_view{reinterpret_cast(&idTable( @@ -477,14 +476,14 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (auto [i, idTable] : getRowIndices(limitAndOffset, *resultTable)) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); Id id = idTable(i, val.columnIndex_); auto optionalStringAndType = idToStringAndType( - qet.getQec()->getIndex(), id, resultTable->localVocab(), + qet.getQec()->getIndex(), id, result->localVocab(), escapeFunction); if (optionalStringAndType.has_value()) [[likely]] { co_yield optionalStringAndType.value().first; @@ -584,7 +583,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: selectClause.getSelectedVariablesAsStrings(); // This call triggers the possibly expensive computation of the query result // unless the result is already cached. - std::shared_ptr resultTable = qet.getResult(); + std::shared_ptr result = qet.getResult(); // In the XML format, the variables don't include the question mark. auto varsWithoutQuestionMark = std::views::transform( @@ -596,18 +595,18 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: co_yield "\n"; - resultTable->logResultSize(); + result->logResultSize(); auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (auto [i, idTable] : getRowIndices(limitAndOffset, *resultTable)) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); Id id = idTable(i, val.columnIndex_); co_yield idToXMLBinding(val.variable_, id, qet.getQec()->getIndex(), - resultTable->localVocab()); + result->localVocab()); } } co_yield "\n "; @@ -625,7 +624,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::constructQueryResultToStream( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, - LimitOffsetClause limitAndOffset, std::shared_ptr resultTable, + LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { static_assert(format == MediaType::octetStream || format == MediaType::csv || format == MediaType::tsv || format == MediaType::sparqlXml); @@ -634,14 +633,14 @@ ExportQueryExecutionTrees::constructQueryResultToStream( } else if constexpr (format == MediaType::sparqlXml) { AD_THROW("XML export is currently not supported for CONSTRUCT queries"); } - resultTable->logResultSize(); + result->logResultSize(); constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; constexpr char sep = format == MediaType::tsv ? '\t' : ','; auto generator = constructQueryResultToTriples(qet, constructTriples, limitAndOffset, - resultTable, std::move(cancellationHandle)); + result, std::move(cancellationHandle)); for (auto& triple : generator) { co_yield escapeFunction(std::move(triple.subject_)); co_yield sep; @@ -743,11 +742,11 @@ nlohmann::json ExportQueryExecutionTrees::computeSelectQueryResultAsSparqlJSON( AD_THROW( "SPARQL-compliant JSON format is only supported for SELECT queries"); } - std::shared_ptr resultTable = qet.getResult(); - resultTable->logResultSize(); - return selectQueryResultToSparqlJSON( - qet, query.selectClause(), query._limitOffset, std::move(resultTable), - std::move(cancellationHandle)); + std::shared_ptr result = qet.getResult(); + result->logResultSize(); + return selectQueryResultToSparqlJSON(qet, query.selectClause(), + query._limitOffset, std::move(result), + std::move(cancellationHandle)); } // _____________________________________________________________________________ From ba142a04deabdac303620a756dc764ee6e279b7f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 9 Jun 2024 22:04:04 +0200 Subject: [PATCH 051/133] Add correctness check to prevent double move due to race condition --- src/util/CacheableGenerator.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index ed886d1376..f77a2ba4cb 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -256,8 +256,10 @@ class CacheableGenerator { IteratorSentinel end() const noexcept { return IteratorSentinel{}; } cppcoro::generator extractGenerator() && { - std::unique_lock lock{computationStorage_->mutex_}; - cppcoro::generator result{std::move(computationStorage_->generator_)}; + auto pointerCopy = computationStorage_; + AD_CORRECTNESS_CHECK(pointerCopy); + std::unique_lock lock{pointerCopy->mutex_}; + cppcoro::generator result{std::move(pointerCopy->generator_)}; computationStorage_.reset(); return result; } From 44562c7a76ea460e9f22f72332ea661e0210db71 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 13 Jun 2024 02:02:03 +0200 Subject: [PATCH 052/133] Start implementing tests for new cache feature and fixing bugs along the way --- src/util/Cache.h | 51 ++++-- test/CacheTest.cpp | 399 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 432 insertions(+), 18 deletions(-) diff --git a/src/util/Cache.h b/src/util/Cache.h index 9664a9e561..b35dbf40b9 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -6,26 +6,23 @@ #pragma once -#include +#include +#include #include #include #include -#include #include #include -#include "./HashMap.h" -#include "PriorityQueue.h" -#include "util/ConstexprUtils.h" +#include "util/HashMap.h" #include "util/MemorySize/MemorySize.h" +#include "util/PriorityQueue.h" #include "util/TypeTraits.h" #include "util/ValueSizeGetters.h" namespace ad_utility { -using std::make_shared; -using std::pair; using std::shared_ptr; using namespace ad_utility::memory_literals; @@ -101,14 +98,13 @@ class FlexibleCache { }; using EmplacedValue = shared_ptr; - // using Entry = pair; using EntryList = PriorityQueue; using AccessMap = MapType; using PinnedMap = MapType; using SizeMap = MapType; - using TryEmplaceResult = pair; + using TryEmplaceResult = std::pair; public: //! Typical constructor. A default value may be added in time. @@ -150,7 +146,7 @@ class FlexibleCache { /// Insert a key-value pair to the cache. Throws an exception if the key is /// already present. If the value is too big for the cache, nothing happens. ValuePtr insert(const Key& key, Value value) { - auto ptr = make_shared(std::move(value)); + auto ptr = std::make_shared(std::move(value)); return insert(key, std::move(ptr)); } @@ -158,7 +154,7 @@ class FlexibleCache { // is already present. If the value is too big for the cache, an exception is // thrown. ValuePtr insertPinned(const Key& key, Value value) { - auto ptr = make_shared(std::move(value)); + auto ptr = std::make_shared(std::move(value)); return insertPinned(key, std::move(ptr)); } @@ -242,14 +238,15 @@ class FlexibleCache { MemorySize& variable, bool pinned) { auto newSize = _valueSizeGetter(*(*this)[key]); auto& oldSize = _sizeMap.at(key); - // Overflowing if oldSize > newSize is fine here, the math adds up - // nevertheless. - auto sizeDelta = newSize - oldSize; - if (newSize > oldSize) { - if (_maxSizeSingleEntry >= newSize) { + if (newSize >= oldSize) { + auto sizeDelta = newSize - oldSize; + if (_maxSizeSingleEntry < newSize) { result = ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE; if (removeIfEntryGrewTooBig && !pinned) { erase(key); + } else { + oldSize += sizeDelta; + variable += sizeDelta; } // We don't know how to shrink the size here, so if // `removeIfEntryGrewTooBig` is false, this needs to be handled by the @@ -262,12 +259,19 @@ class FlexibleCache { // We can't fit it in the cache, so remove if not pinned if (!pinned) { erase(key); + } else { + oldSize += sizeDelta; + variable += sizeDelta; } return; } + oldSize += sizeDelta; + variable += sizeDelta; + } else { + auto negativeSizeDelta = oldSize - newSize; + oldSize -= negativeSizeDelta; + variable -= negativeSizeDelta; } - oldSize += sizeDelta; - variable += sizeDelta; makeRoomIfFits(0_B); }; if (containsPinned(key)) { @@ -475,6 +479,17 @@ class FlexibleCache { PinnedMap _pinnedMap; AccessMap _accessMap; SizeMap _sizeMap; + + FRIEND_TEST(LRUCacheTest, + verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErased); + + FRIEND_TEST(LRUCacheTest, + verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErasedPinned); + FRIEND_TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed); + FRIEND_TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned); + FRIEND_TEST(LRUCacheTest, + verifyNonPinnedEntriesAreRemovedToMakeRoomForResize); + FRIEND_TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement); }; // Partial instantiation of FlexibleCache using the heap-based priority queue diff --git a/test/CacheTest.cpp b/test/CacheTest.cpp index d28e992016..2410fed085 100644 --- a/test/CacheTest.cpp +++ b/test/CacheTest.cpp @@ -139,4 +139,403 @@ TEST(LRUCacheTest, testDecreasingCapacity) { ASSERT_FALSE(cache["3"]); ASSERT_FALSE(cache["4"]); } + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyTransformValueWorksForNonPinnedValues) { + LRUCache> cache{2, 3_B}; + cache.insert("1", "x"); + + ASSERT_EQ(cache.nonPinnedSize(), 1_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + + cache.transformValue("1", + [](const std::string& value) { return value + "a"; }); + + ASSERT_EQ(cache.nonPinnedSize(), 2_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + ASSERT_TRUE(cache.contains("1")); + ASSERT_EQ(*cache["1"], "xa"); + + cache.insert("2", "y"); + + ASSERT_EQ(cache.nonPinnedSize(), 3_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + + cache.transformValue("1", + [](const std::string& value) { return value + "b"; }); + + ASSERT_EQ(cache.nonPinnedSize(), 3_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + + ASSERT_TRUE(cache.contains("1")); + ASSERT_FALSE(cache.contains("2")); + ASSERT_EQ(*cache["1"], "xab"); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyTransformValueWorksForPinnedValues) { + LRUCache> cache{1}; + cache.insertPinned("1", "x"); + + ASSERT_EQ(cache.nonPinnedSize(), 0_B); + ASSERT_EQ(cache.pinnedSize(), 1_B); + + cache.transformValue("1", + [](const std::string& value) { return value + "a"; }); + + ASSERT_EQ(cache.nonPinnedSize(), 0_B); + ASSERT_EQ(cache.pinnedSize(), 2_B); + ASSERT_TRUE(cache.contains("1")); + ASSERT_EQ(*cache["1"], "xa"); + + cache.insert("2", "y"); + + ASSERT_EQ(cache.nonPinnedSize(), 1_B); + ASSERT_EQ(cache.pinnedSize(), 2_B); + ASSERT_TRUE(cache.contains("1")); + ASSERT_TRUE(cache.contains("2")); + + cache.transformValue("1", + [](const std::string& value) { return value + "b"; }); + + ASSERT_EQ(cache.nonPinnedSize(), 0_B); + ASSERT_EQ(cache.pinnedSize(), 3_B); + ASSERT_TRUE(cache.contains("1")); + ASSERT_FALSE(cache.contains("2")); + + ASSERT_EQ(*cache["1"], "xab"); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyTransformValueWorksIsNoOpForNonExistantValues) { + LRUCache> cache{1}; + + ASSERT_EQ(cache.nonPinnedSize(), 0_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + + cache.transformValue("1", + [](const std::string&) { return std::string{"a"}; }); + + ASSERT_EQ(cache.nonPinnedSize(), 0_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + ASSERT_FALSE(cache.contains("1")); + + cache.insert("2", "y"); + + ASSERT_EQ(cache.nonPinnedSize(), 1_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + ASSERT_TRUE(cache.contains("2")); + + cache.transformValue("1", + [](const std::string&) { return std::string{"a"}; }); + + ASSERT_EQ(cache.nonPinnedSize(), 1_B); + ASSERT_EQ(cache.pinnedSize(), 0_B); + ASSERT_FALSE(cache.contains("1")); + ASSERT_TRUE(cache.contains("2")); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErased) { + using Vec = std::vector; + auto vectorSizeGetter = [](const auto& pointer) { + return pointer->size() * sizeof(int) * 1_B; + }; + LRUCache>, decltype(vectorSizeGetter)> + cache{1}; + + auto vecA = std::make_shared(); + + cache.insert(0, vecA); + + ASSERT_EQ(cache._totalSizeNonPinned, 0_B); + vecA->push_back(0); + + // Cache does was not notified about the size change + ASSERT_EQ(cache._totalSizeNonPinned, 0_B); + + cache.erase(0); + + // Cache should not underflow + ASSERT_EQ(cache._totalSizeNonPinned, 0_B); + + cache.insert(0, vecA); + + ASSERT_EQ(cache._totalSizeNonPinned, 4_B); + vecA->clear(); + + // Cache does was not notified about the size change + ASSERT_EQ(cache._totalSizeNonPinned, 4_B); + + cache.erase(0); + + // Cache correctly remove size, even though the vector is empty by now. + ASSERT_EQ(cache._totalSizeNonPinned, 0_B); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, + verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErasedPinned) { + using Vec = std::vector; + auto vectorSizeGetter = [](const auto& pointer) { + return pointer->size() * sizeof(int) * 1_B; + }; + LRUCache>, decltype(vectorSizeGetter)> + cache{1}; + + auto vecA = std::make_shared(); + + cache.insertPinned(0, vecA); + + ASSERT_EQ(cache._totalSizePinned, 0_B); + vecA->push_back(0); + + // Cache does was not notified about the size change + ASSERT_EQ(cache._totalSizePinned, 0_B); + + cache.erase(0); + + // Cache should not underflow + ASSERT_EQ(cache._totalSizePinned, 0_B); + + cache.insertPinned(0, vecA); + + ASSERT_EQ(cache._totalSizePinned, 4_B); + vecA->clear(); + + // Cache does was not notified about the size change + ASSERT_EQ(cache._totalSizePinned, 4_B); + + cache.erase(0); + + // Cache correctly remove size, even though the vector is empty by now. + ASSERT_EQ(cache._totalSizePinned, 0_B); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { + using enum ResizeResult; + using Vec = std::vector; + auto vectorSizeGetter = [](const auto& pointer) { + return pointer->size() * sizeof(int) * 1_B; + }; + LRUCache>, decltype(vectorSizeGetter)> + cache{3, 8_B, 4_B}; + + auto vecA = std::make_shared(); + auto vecB = std::make_shared(1); + + cache.insert(0, vecA); + cache.insert(1, vecB); + + ASSERT_EQ(cache._totalSizeNonPinned, 4_B); + + vecA->push_back(0); + vecB->push_back(1); + + // Cache does was not notified about the size change + ASSERT_EQ(cache._totalSizeNonPinned, 4_B); + + ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); + + ASSERT_EQ(cache._totalSizeNonPinned, 8_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + + vecA->push_back(1); + + ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, false)); + + ASSERT_EQ(cache._totalSizeNonPinned, 12_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + + vecA->resize(1); + + ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); + + ASSERT_EQ(cache._totalSizeNonPinned, 8_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + + auto vecC = std::make_shared(0); + cache.insert(2, vecC); + vecB->resize(1); + + ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(1, false)); + + ASSERT_EQ(cache._totalSizeNonPinned, 8_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + ASSERT_TRUE(cache.contains(2)); + + // Set to high value to avoid getting limited by this. + cache.setMaxSizeSingleEntry(64_B); + vecC->push_back(0); + vecC->push_back(1); + vecC->push_back(2); + ASSERT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, false)); + + ASSERT_EQ(cache._totalSizeNonPinned, 8_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + ASSERT_FALSE(cache.contains(2)); + + cache.setMaxSizeSingleEntry(4_B); + vecA->push_back(1); + + ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); + + ASSERT_EQ(cache._totalSizeNonPinned, 4_B); + ASSERT_FALSE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + ASSERT_FALSE(cache.contains(2)); + + vecB->clear(); + cache.erase(1); + + ASSERT_EQ(cache._totalSizeNonPinned, 0_B); + ASSERT_FALSE(cache.contains(0)); + ASSERT_FALSE(cache.contains(1)); + ASSERT_FALSE(cache.contains(2)); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { + using enum ResizeResult; + using Vec = std::vector; + auto vectorSizeGetter = [](const auto& pointer) { + return pointer->size() * sizeof(int) * 1_B; + }; + LRUCache>, decltype(vectorSizeGetter)> + cache{3, 8_B, 4_B}; + + auto vecA = std::make_shared(); + auto vecB = std::make_shared(1); + + cache.insertPinned(0, vecA); + cache.insertPinned(1, vecB); + + ASSERT_EQ(cache._totalSizePinned, 4_B); + + vecA->push_back(0); + vecB->push_back(1); + + // Cache does was not notified about the size change + ASSERT_EQ(cache._totalSizePinned, 4_B); + + ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); + + ASSERT_EQ(cache._totalSizePinned, 8_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + + vecA->push_back(1); + + ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, false)); + ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); + + ASSERT_EQ(cache._totalSizePinned, 12_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + + vecA->resize(1); + + ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); + + ASSERT_EQ(cache._totalSizePinned, 8_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + + auto vecC = std::make_shared(0); + cache.insertPinned(2, vecC); + vecB->resize(1); + + ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(1, false)); + + ASSERT_EQ(cache._totalSizePinned, 8_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + ASSERT_TRUE(cache.contains(2)); + + // Set to high value to avoid getting limited by this. + cache.setMaxSizeSingleEntry(64_B); + vecC->push_back(0); + vecC->push_back(1); + vecC->push_back(2); + ASSERT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, true)); + + ASSERT_EQ(cache._totalSizePinned, 20_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + ASSERT_TRUE(cache.contains(2)); + cache.erase(2); + + cache.setMaxSizeSingleEntry(4_B); + vecA->push_back(1); + + ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); + + ASSERT_EQ(cache._totalSizePinned, 12_B); + ASSERT_TRUE(cache.contains(0)); + ASSERT_TRUE(cache.contains(1)); + ASSERT_FALSE(cache.contains(2)); + cache.erase(0); + + vecB->clear(); + cache.erase(1); + + ASSERT_EQ(cache._totalSizePinned, 0_B); + ASSERT_FALSE(cache.contains(0)); + ASSERT_FALSE(cache.contains(1)); + ASSERT_FALSE(cache.contains(2)); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyNonPinnedEntriesAreRemovedToMakeRoomForResize) { + using enum ResizeResult; + using Vec = std::vector; + auto vectorSizeGetter = [](const auto& pointer) { + return pointer->size() * sizeof(int) * 1_B; + }; + LRUCache>, decltype(vectorSizeGetter)> + cache{3, 8_B, 4_B}; + + auto vecA = std::make_shared(1); + auto vecB = std::make_shared(1); + auto vecC = std::make_shared(0); + + cache.insertPinned(0, vecA); + cache.insert(1, vecB); + cache.insert(2, vecC); + + vecC->push_back(0); + + ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(2, true)); + ASSERT_TRUE(cache.contains(0)); + ASSERT_FALSE(cache.contains(1)); + ASSERT_TRUE(cache.contains(2)); +} + +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement) { + LRUCache> cache{1}; + cache.insert("1", "a"); + + cache.recomputeSize("2", false); + + EXPECT_TRUE(cache.contains("1")); + EXPECT_FALSE(cache.contains("2")); + + cache.recomputeSize("2", true); + + EXPECT_TRUE(cache.contains("1")); + EXPECT_FALSE(cache.contains("2")); +} + +// TODO Add test to check if for EXCEEDS_MAX_SIZE the pinned entries +// are correctly taken into consideration +// TODO Add test and update Cache code to signal EXCEEDS_MAX_SIZE and +// EXCEEDS_SINGLE_ENTRY_SIZE if after reduction in size entries are still too +// large. } // namespace ad_utility From 0f3a59a94a8c496861b4c63e3ff502dc223666b6 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 13 Jun 2024 02:04:16 +0200 Subject: [PATCH 053/133] Some Test cleanup --- test/CacheTest.cpp | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/test/CacheTest.cpp b/test/CacheTest.cpp index 2410fed085..1ba2b473c9 100644 --- a/test/CacheTest.cpp +++ b/test/CacheTest.cpp @@ -5,7 +5,6 @@ #include #include -#include #include "util/Cache.h" #include "util/DefaultValueSizeGetter.h" @@ -13,6 +12,12 @@ using std::string; using namespace ad_utility::memory_literals; +using enum ad_utility::ResizeResult; + +using Vec = std::vector; +auto vectorSizeGetter = [](const auto& pointer) { + return pointer->size() * sizeof(int) * 1_B; +}; // first some simple Tests for the general cache interface TEST(FlexibleCacheTest, Simple) { @@ -237,10 +242,6 @@ TEST(LRUCacheTest, verifyTransformValueWorksIsNoOpForNonExistantValues) { // _____________________________________________________________________________ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErased) { - using Vec = std::vector; - auto vectorSizeGetter = [](const auto& pointer) { - return pointer->size() * sizeof(int) * 1_B; - }; LRUCache>, decltype(vectorSizeGetter)> cache{1}; @@ -276,10 +277,6 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErased) { // _____________________________________________________________________________ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErasedPinned) { - using Vec = std::vector; - auto vectorSizeGetter = [](const auto& pointer) { - return pointer->size() * sizeof(int) * 1_B; - }; LRUCache>, decltype(vectorSizeGetter)> cache{1}; @@ -314,11 +311,6 @@ TEST(LRUCacheTest, // _____________________________________________________________________________ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { - using enum ResizeResult; - using Vec = std::vector; - auto vectorSizeGetter = [](const auto& pointer) { - return pointer->size() * sizeof(int) * 1_B; - }; LRUCache>, decltype(vectorSizeGetter)> cache{3, 8_B, 4_B}; @@ -402,11 +394,6 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { // _____________________________________________________________________________ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { - using enum ResizeResult; - using Vec = std::vector; - auto vectorSizeGetter = [](const auto& pointer) { - return pointer->size() * sizeof(int) * 1_B; - }; LRUCache>, decltype(vectorSizeGetter)> cache{3, 8_B, 4_B}; @@ -493,11 +480,6 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { // _____________________________________________________________________________ TEST(LRUCacheTest, verifyNonPinnedEntriesAreRemovedToMakeRoomForResize) { - using enum ResizeResult; - using Vec = std::vector; - auto vectorSizeGetter = [](const auto& pointer) { - return pointer->size() * sizeof(int) * 1_B; - }; LRUCache>, decltype(vectorSizeGetter)> cache{3, 8_B, 4_B}; From d226849b415863b582753fb086e15fff8536fe94 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 13 Jun 2024 02:30:15 +0200 Subject: [PATCH 054/133] Mark variable as maybe_unused --- test/CacheTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CacheTest.cpp b/test/CacheTest.cpp index 1ba2b473c9..3fa12b2e3a 100644 --- a/test/CacheTest.cpp +++ b/test/CacheTest.cpp @@ -15,7 +15,7 @@ using namespace ad_utility::memory_literals; using enum ad_utility::ResizeResult; using Vec = std::vector; -auto vectorSizeGetter = [](const auto& pointer) { +[[maybe_unused]] auto vectorSizeGetter = [](const auto& pointer) { return pointer->size() * sizeof(int) * 1_B; }; From cde135a2cf376545b9f12d4917385c5e409ca1eb Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 14 Jun 2024 00:43:11 +0200 Subject: [PATCH 055/133] Restructure recomputeSize a bit to avoid unwanted behaviour --- src/util/Cache.h | 53 ++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/util/Cache.h b/src/util/Cache.h index b35dbf40b9..051633bee6 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -238,41 +238,36 @@ class FlexibleCache { MemorySize& variable, bool pinned) { auto newSize = _valueSizeGetter(*(*this)[key]); auto& oldSize = _sizeMap.at(key); - if (newSize >= oldSize) { - auto sizeDelta = newSize - oldSize; - if (_maxSizeSingleEntry < newSize) { - result = ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE; - if (removeIfEntryGrewTooBig && !pinned) { - erase(key); - } else { - oldSize += sizeDelta; - variable += sizeDelta; - } - // We don't know how to shrink the size here, so if - // `removeIfEntryGrewTooBig` is false, this needs to be handled by the - // caller. + bool needsShrinking = newSize != oldSize; + MemorySize pinnedOffset = pinned ? 0_B : _totalSizePinned; + if (_maxSizeSingleEntry < newSize) { + result = ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE; + if (removeIfEntryGrewTooBig && !pinned) { + erase(key); return; } - MemorySize pinnedOffset = pinned ? 0_B : _totalSizePinned; - if (_maxSize - pinnedOffset < newSize) { - result = ResizeResult::EXCEEDS_MAX_SIZE; - // We can't fit it in the cache, so remove if not pinned - if (!pinned) { - erase(key); - } else { - oldSize += sizeDelta; - variable += sizeDelta; - } + // We don't know how to shrink the size here, so if + // `removeIfEntryGrewTooBig` is false, this needs to be handled by the + // caller. + needsShrinking = false; + } else if (_maxSize - std::min(pinnedOffset, _maxSize) < newSize) { + result = ResizeResult::EXCEEDS_MAX_SIZE; + // We can't fit it in the cache, so remove if not pinned + if (!pinned) { + erase(key); return; } - oldSize += sizeDelta; - variable += sizeDelta; + } + + if (newSize >= oldSize) { + variable += newSize - oldSize; } else { - auto negativeSizeDelta = oldSize - newSize; - oldSize -= negativeSizeDelta; - variable -= negativeSizeDelta; + variable -= oldSize - newSize; + } + oldSize = newSize; + if (needsShrinking && _totalSizePinned <= _maxSize) { + makeRoomIfFits(0_B); } - makeRoomIfFits(0_B); }; if (containsPinned(key)) { applySizeDifference(_totalSizePinned, true); From cf6b4c96ec87dd6bab19d8cbb247b52c09f08073 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 14 Jun 2024 20:50:56 +0200 Subject: [PATCH 056/133] Add remaining cache tests --- src/util/Cache.h | 2 +- test/CacheTest.cpp | 98 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 77 insertions(+), 23 deletions(-) diff --git a/src/util/Cache.h b/src/util/Cache.h index 051633bee6..f45dca269b 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -238,7 +238,7 @@ class FlexibleCache { MemorySize& variable, bool pinned) { auto newSize = _valueSizeGetter(*(*this)[key]); auto& oldSize = _sizeMap.at(key); - bool needsShrinking = newSize != oldSize; + bool needsShrinking = true; MemorySize pinnedOffset = pinned ? 0_B : _totalSizePinned; if (_maxSizeSingleEntry < newSize) { result = ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE; diff --git a/test/CacheTest.cpp b/test/CacheTest.cpp index 3fa12b2e3a..894870fe3d 100644 --- a/test/CacheTest.cpp +++ b/test/CacheTest.cpp @@ -322,8 +322,8 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { ASSERT_EQ(cache._totalSizeNonPinned, 4_B); - vecA->push_back(0); - vecB->push_back(1); + vecA->resize(1); + vecB->resize(2); // Cache does was not notified about the size change ASSERT_EQ(cache._totalSizeNonPinned, 4_B); @@ -334,7 +334,7 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { ASSERT_TRUE(cache.contains(0)); ASSERT_TRUE(cache.contains(1)); - vecA->push_back(1); + vecA->resize(2); ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, false)); @@ -363,9 +363,7 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { // Set to high value to avoid getting limited by this. cache.setMaxSizeSingleEntry(64_B); - vecC->push_back(0); - vecC->push_back(1); - vecC->push_back(2); + vecC->resize(3); ASSERT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, false)); ASSERT_EQ(cache._totalSizeNonPinned, 8_B); @@ -374,7 +372,7 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { ASSERT_FALSE(cache.contains(2)); cache.setMaxSizeSingleEntry(4_B); - vecA->push_back(1); + vecA->resize(2); ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); @@ -405,8 +403,8 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { ASSERT_EQ(cache._totalSizePinned, 4_B); - vecA->push_back(0); - vecB->push_back(1); + vecA->resize(1); + vecB->resize(2); // Cache does was not notified about the size change ASSERT_EQ(cache._totalSizePinned, 4_B); @@ -417,7 +415,7 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { ASSERT_TRUE(cache.contains(0)); ASSERT_TRUE(cache.contains(1)); - vecA->push_back(1); + vecA->resize(2); ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, false)); ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); @@ -447,9 +445,7 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { // Set to high value to avoid getting limited by this. cache.setMaxSizeSingleEntry(64_B); - vecC->push_back(0); - vecC->push_back(1); - vecC->push_back(2); + vecC->resize(3); ASSERT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, true)); ASSERT_EQ(cache._totalSizePinned, 20_B); @@ -459,7 +455,7 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { cache.erase(2); cache.setMaxSizeSingleEntry(4_B); - vecA->push_back(1); + vecA->resize(2); ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); @@ -491,7 +487,7 @@ TEST(LRUCacheTest, verifyNonPinnedEntriesAreRemovedToMakeRoomForResize) { cache.insert(1, vecB); cache.insert(2, vecC); - vecC->push_back(0); + vecC->resize(1); ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(2, true)); ASSERT_TRUE(cache.contains(0)); @@ -504,20 +500,78 @@ TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement) { LRUCache> cache{1}; cache.insert("1", "a"); - cache.recomputeSize("2", false); + EXPECT_EQ(FITS_IN_CACHE, cache.recomputeSize("2", false)); EXPECT_TRUE(cache.contains("1")); EXPECT_FALSE(cache.contains("2")); - cache.recomputeSize("2", true); + EXPECT_EQ(FITS_IN_CACHE, cache.recomputeSize("2", true)); EXPECT_TRUE(cache.contains("1")); EXPECT_FALSE(cache.contains("2")); } -// TODO Add test to check if for EXCEEDS_MAX_SIZE the pinned entries -// are correctly taken into consideration -// TODO Add test and update Cache code to signal EXCEEDS_MAX_SIZE and -// EXCEEDS_SINGLE_ENTRY_SIZE if after reduction in size entries are still too -// large. +TEST(LRUCacheTest, verifyRecomputeDoesNoticeExceedingSizeOnShrink) { + LRUCache>, decltype(vectorSizeGetter)> + cache{3, 12_B, 8_B}; + + auto vecA = std::make_shared(2); + auto vecB = std::make_shared(1); + auto vecC = std::make_shared(0); + + cache.insert(0, vecA); + cache.insert(1, vecB); + cache.insert(2, vecC); + + vecC->resize(4); + + EXPECT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(2, false)); + + EXPECT_TRUE(cache.contains(0)); + EXPECT_TRUE(cache.contains(1)); + EXPECT_TRUE(cache.contains(2)); + + vecC->resize(3); + + EXPECT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(2, false)); + + EXPECT_TRUE(cache.contains(0)); + EXPECT_TRUE(cache.contains(1)); + EXPECT_TRUE(cache.contains(2)); + + vecC->resize(2); + + EXPECT_EQ(FITS_IN_CACHE, cache.recomputeSize(2, false)); + + EXPECT_FALSE(cache.contains(0)); + EXPECT_TRUE(cache.contains(1)); + EXPECT_TRUE(cache.contains(2)); + + vecC->resize(5); + EXPECT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(2, false)); + + vecC->resize(4); + cache.setMaxSizeSingleEntry(16_B); + + EXPECT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, false)); + + EXPECT_FALSE(cache.contains(0)); + EXPECT_TRUE(cache.contains(1)); + EXPECT_FALSE(cache.contains(2)); +} + +TEST(LRUCacheTest, verifyRecomputeDoesConsiderPinnedSizeForMaxSize) { + LRUCache>, decltype(vectorSizeGetter)> + cache{3, 8_B, 8_B}; + + auto vecA = std::make_shared(2); + auto vecB = std::make_shared(0); + + cache.insertPinned(0, vecA); + cache.insert(1, vecB); + + vecB->resize(1); + + EXPECT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(1, false)); +} } // namespace ad_utility From 0c589e3cbdf7ac6f46907f33f141e5d8b1ccc011 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 14 Jun 2024 22:29:19 +0200 Subject: [PATCH 057/133] Add tests for `IteratorWrapper` --- src/util/IteratorWrapper.h | 18 +++++-------- test/CMakeLists.txt | 42 +++++++++++++++-------------- test/IteratorWrapperTest.cpp | 52 ++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 32 deletions(-) create mode 100644 test/IteratorWrapperTest.cpp diff --git a/src/util/IteratorWrapper.h b/src/util/IteratorWrapper.h index 6a9e4fc9ea..e6073697f7 100644 --- a/src/util/IteratorWrapper.h +++ b/src/util/IteratorWrapper.h @@ -9,27 +9,21 @@ namespace ad_utility { -template +template class IteratorWrapper { - OriginalIterator& iterator_; + OriginalIterable& iterable_; std::tuple args_; public: - explicit IteratorWrapper(OriginalIterator& iterator, Args... args) - : iterator_{iterator}, args_{std::move(args)...} {} + explicit IteratorWrapper(OriginalIterable& iterator, Args... args) + : iterable_{iterator}, args_{std::move(args)...} {} auto begin() { - return std::apply([this](auto... args) { return iterator_.begin(args...); }, + return std::apply([this](auto... args) { return iterable_.begin(args...); }, args_); } - auto end() { return iterator_.end(); } - - auto& operator++() { return iterator_++; } - - auto& operator*() { return *iterator_; } - - auto operator->() { return std::addressof(operator*()); } + auto end() { return iterable_.end(); } }; }; // namespace ad_utility diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1aace8bca3..11c02b7885 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,13 +6,13 @@ add_subdirectory(util) # Link binary ${basename} against `gmock_main`, the threading library, the # general test utilities and all libraries that are specified as additional # arguments. -function (linkTest basename) +function(linkTest basename) qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) endfunction() # Add the executable ${basename} that is compiled from the source file # "${basename}".cpp -function (addTest basename) +function(addTest basename) add_executable(${basename} "${basename}.cpp") endfunction() @@ -43,23 +43,23 @@ if (SINGLE_TEST_BINARY) qlever_target_link_libraries(QLeverAllUnitTestsMain gtest gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain PROPERTIES RUN_SERIAL TRUE) -else() +else () message(STATUS "The tests are split over multiple binaries") -endif() +endif () # Usage: `addAndLinkTest(basename, [additionalLibraries...]` # Add a GTest/GMock test case that is called `basename` and compiled from a file called # `basename.cpp`. All tests are linked against `gmock_main` and the threading library. # additional libraries against which the test case has to be linked can be specified as # additional arguments after the `basename` function(addLinkAndDiscoverTest basename) - if (SINGLE_TEST_BINARY) - target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) - else() - addTest(${basename}) - linkAndDiscoverTest(${basename} ${ARGN}) - endif() + if (SINGLE_TEST_BINARY) + target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) + qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) + else () + addTest(${basename}) + linkAndDiscoverTest(${basename} ${ARGN}) + endif () endfunction() @@ -68,13 +68,13 @@ endfunction() # (without any of the other test cases running in parallel). This can be # required e.g. if several tests cases write to the same file. function(addLinkAndDiscoverTestSerial basename) - if (SINGLE_TEST_BINARY) - target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) - else() - addTest(${basename}) - linkAndDiscoverTestSerial(${basename} ${ARGN}) - endif() + if (SINGLE_TEST_BINARY) + target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) + qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) + else () + addTest(${basename}) + linkAndDiscoverTestSerial(${basename} ${ARGN}) + endif () endfunction() # Only compile and link the test, but do not run it. @@ -142,10 +142,10 @@ addLinkAndDiscoverTest(UnionTest engine) if (SINGLE_TEST_BINARY) target_sources(QLeverAllUnitTestsMain PUBLIC TokenTest.cpp TokenTestCtreHelper.cpp) qlever_target_link_libraries(QLeverAllUnitTestsMain parser re2 util) -else() +else () add_executable(TokenTest TokenTest.cpp TokenTestCtreHelper.cpp) linkAndDiscoverTest(TokenTest parser re2 util) -endif() +endif () addLinkAndDiscoverTestSerial(TurtleParserTest parser re2) @@ -391,3 +391,5 @@ addLinkAndDiscoverTest(JThreadTest) addLinkAndDiscoverTest(ChunkedForLoopTest) addLinkAndDiscoverTest(FsstCompressorTest fsst) + +addLinkAndDiscoverTest(IteratorWrapperTest) diff --git a/test/IteratorWrapperTest.cpp b/test/IteratorWrapperTest.cpp new file mode 100644 index 0000000000..02a16ea75a --- /dev/null +++ b/test/IteratorWrapperTest.cpp @@ -0,0 +1,52 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include + +#include + +#include "util/IteratorWrapper.h" + +using ad_utility::IteratorWrapper; + +TEST(IteratorWrapper, transparentWrapper) { + std::vector vec{1, 2, 3}; + int numIterations = 0; + for (auto value : IteratorWrapper{vec}) { + EXPECT_EQ(value, numIterations + 1); + numIterations++; + } + EXPECT_EQ(numIterations, 3); +} + +// _____________________________________________________________________________ + +struct TestIterable { + std::vector vec_{1, 2, 3}; + bool value1_ = false; + int value2_ = 0; + std::string value3_ = ""; + + auto begin(bool value1, int value2, std::string value3) { + value1_ = value1; + value2_ = value2; + value3_ = std::move(value3); + return vec_.begin(); + } + + auto end() { return vec_.end(); } +}; + +TEST(IteratorWrapper, verifyArgumentsArePassed) { + TestIterable testIterable; + int numIterations = 0; + for (auto value : IteratorWrapper{testIterable, true, 42, "Hi"}) { + EXPECT_EQ(value, numIterations + 1); + numIterations++; + } + EXPECT_EQ(numIterations, 3); + EXPECT_TRUE(testIterable.value1_); + EXPECT_EQ(testIterable.value2_, 42); + EXPECT_EQ(testIterable.value3_, "Hi"); +} From c4656852f2e75fea267a871f91552e6a3345b1d1 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 15 Jun 2024 16:55:22 +0200 Subject: [PATCH 058/133] Fix line endings --- test/IteratorWrapperTest.cpp | 104 +++++++++++++++++------------------ 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/test/IteratorWrapperTest.cpp b/test/IteratorWrapperTest.cpp index 02a16ea75a..1637102eb9 100644 --- a/test/IteratorWrapperTest.cpp +++ b/test/IteratorWrapperTest.cpp @@ -1,52 +1,52 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Robin Textor-Falconi - -#include - -#include - -#include "util/IteratorWrapper.h" - -using ad_utility::IteratorWrapper; - -TEST(IteratorWrapper, transparentWrapper) { - std::vector vec{1, 2, 3}; - int numIterations = 0; - for (auto value : IteratorWrapper{vec}) { - EXPECT_EQ(value, numIterations + 1); - numIterations++; - } - EXPECT_EQ(numIterations, 3); -} - -// _____________________________________________________________________________ - -struct TestIterable { - std::vector vec_{1, 2, 3}; - bool value1_ = false; - int value2_ = 0; - std::string value3_ = ""; - - auto begin(bool value1, int value2, std::string value3) { - value1_ = value1; - value2_ = value2; - value3_ = std::move(value3); - return vec_.begin(); - } - - auto end() { return vec_.end(); } -}; - -TEST(IteratorWrapper, verifyArgumentsArePassed) { - TestIterable testIterable; - int numIterations = 0; - for (auto value : IteratorWrapper{testIterable, true, 42, "Hi"}) { - EXPECT_EQ(value, numIterations + 1); - numIterations++; - } - EXPECT_EQ(numIterations, 3); - EXPECT_TRUE(testIterable.value1_); - EXPECT_EQ(testIterable.value2_, 42); - EXPECT_EQ(testIterable.value3_, "Hi"); -} +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include + +#include + +#include "util/IteratorWrapper.h" + +using ad_utility::IteratorWrapper; + +TEST(IteratorWrapper, transparentWrapper) { + std::vector vec{1, 2, 3}; + int numIterations = 0; + for (auto value : IteratorWrapper{vec}) { + EXPECT_EQ(value, numIterations + 1); + numIterations++; + } + EXPECT_EQ(numIterations, 3); +} + +// _____________________________________________________________________________ + +struct TestIterable { + std::vector vec_{1, 2, 3}; + bool value1_ = false; + int value2_ = 0; + std::string value3_ = ""; + + auto begin(bool value1, int value2, std::string value3) { + value1_ = value1; + value2_ = value2; + value3_ = std::move(value3); + return vec_.begin(); + } + + auto end() { return vec_.end(); } +}; + +TEST(IteratorWrapper, verifyArgumentsArePassed) { + TestIterable testIterable; + int numIterations = 0; + for (auto value : IteratorWrapper{testIterable, true, 42, "Hi"}) { + EXPECT_EQ(value, numIterations + 1); + numIterations++; + } + EXPECT_EQ(numIterations, 3); + EXPECT_TRUE(testIterable.value1_); + EXPECT_EQ(testIterable.value2_, 42); + EXPECT_EQ(testIterable.value3_, "Hi"); +} From 93c23602e36db20019c33f7907ab92e530be6727 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 29 Jun 2024 19:48:00 +0200 Subject: [PATCH 059/133] Add tests for CacheableGenerator --- src/util/CacheableGenerator.h | 2 +- test/CMakeLists.txt | 2 + test/CacheableGeneratorTest.cpp | 565 ++++++++++++++++++++++++++++++++ 3 files changed, 568 insertions(+), 1 deletion(-) create mode 100644 test/CacheableGeneratorTest.cpp diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index f77a2ba4cb..c47ed28bb4 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -169,7 +169,7 @@ class CacheableGenerator { if (onSizeChanged_) { bool isShrinkable = i < maxBound - 1; if (onSizeChanged_(isShrinkable)) { - AD_CONTRACT_CHECK(!isShrinkable); + AD_CONTRACT_CHECK(isShrinkable); } else { break; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 40a5610ab6..57145f5a18 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -395,3 +395,5 @@ addLinkAndDiscoverTest(ChunkedForLoopTest) addLinkAndDiscoverTest(FsstCompressorTest fsst) addLinkAndDiscoverTest(IteratorWrapperTest) + +addLinkAndDiscoverTest(CacheableGeneratorTest) diff --git a/test/CacheableGeneratorTest.cpp b/test/CacheableGeneratorTest.cpp new file mode 100644 index 0000000000..df78d5afa5 --- /dev/null +++ b/test/CacheableGeneratorTest.cpp @@ -0,0 +1,565 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include + +#include + +#include "util/CacheableGenerator.h" +#include "util/Generator.h" +#include "util/jthread.h" + +using ad_utility::CacheableGenerator; +using cppcoro::generator; +using namespace std::chrono_literals; + +generator testGenerator(uint32_t range) { + for (uint32_t i = 0; i < range; i++) { + co_yield i; + } +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, allowsMultiConsumption) { + CacheableGenerator generator{testGenerator(3)}; + + auto iterator1 = generator.begin(true); + + ASSERT_NE(iterator1, generator.end()); + EXPECT_EQ(*iterator1, 0); + ++iterator1; + + ASSERT_NE(iterator1, generator.end()); + EXPECT_EQ(*iterator1, 1); + ++iterator1; + + ASSERT_NE(iterator1, generator.end()); + EXPECT_EQ(*iterator1, 2); + ++iterator1; + + EXPECT_EQ(iterator1, generator.end()); + + auto iterator2 = generator.begin(false); + + ASSERT_NE(iterator2, generator.end()); + EXPECT_EQ(*iterator2, 0); + ++iterator2; + + ASSERT_NE(iterator2, generator.end()); + EXPECT_EQ(*iterator2, 1); + ++iterator2; + + ASSERT_NE(iterator2, generator.end()); + EXPECT_EQ(*iterator2, 2); + ++iterator2; + EXPECT_EQ(iterator2, generator.end()); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, masterBlocksSlaves) { + CacheableGenerator generator{testGenerator(3)}; + + // Verify slave is not blocked indefinitely if master has not been started yet + EXPECT_THROW(generator.begin(false), ad_utility::Exception); + + auto masterIterator = generator.begin(true); + std::mutex counterMutex; + std::condition_variable cv; + std::atomic_int counter = 0; + uint32_t proceedStage = 0; + + ad_utility::JThread thread1{[&]() { + auto iterator = generator.begin(false); + + ASSERT_NE(iterator, generator.end()); + { + std::lock_guard guard{counterMutex}; + EXPECT_EQ(counter, 0); + proceedStage = 1; + } + cv.notify_all(); + + EXPECT_EQ(*iterator, 0); + ++iterator; + + ASSERT_NE(iterator, generator.end()); + { + std::lock_guard guard{counterMutex}; + EXPECT_EQ(counter, 1); + proceedStage = 2; + } + cv.notify_all(); + + EXPECT_EQ(*iterator, 1); + ++iterator; + + ASSERT_NE(iterator, generator.end()); + { + std::lock_guard guard{counterMutex}; + EXPECT_EQ(counter, 2); + proceedStage = 3; + } + cv.notify_all(); + + EXPECT_EQ(*iterator, 2); + ++iterator; + + EXPECT_EQ(iterator, generator.end()); + { + std::lock_guard guard{counterMutex}; + EXPECT_EQ(counter, 3); + } + }}; + + ad_utility::JThread thread2{[&]() { + auto iterator = generator.begin(false); + + ASSERT_NE(iterator, generator.end()); + EXPECT_GE(counter, 0); + + EXPECT_EQ(*iterator, 0); + ++iterator; + + ASSERT_NE(iterator, generator.end()); + EXPECT_GE(counter, 1); + + EXPECT_EQ(*iterator, 1); + ++iterator; + + ASSERT_NE(iterator, generator.end()); + EXPECT_GE(counter, 2); + + EXPECT_EQ(*iterator, 2); + ++iterator; + + EXPECT_EQ(iterator, generator.end()); + EXPECT_GE(counter, 3); + }}; + + EXPECT_EQ(*masterIterator, 0); + + { + std::unique_lock guard{counterMutex}; + cv.wait(guard, [&]() { return proceedStage == 1; }); + ++counter; + ++masterIterator; + } + ASSERT_NE(masterIterator, generator.end()); + + EXPECT_EQ(*masterIterator, 1); + { + std::unique_lock guard{counterMutex}; + cv.wait(guard, [&]() { return proceedStage == 2; }); + ++counter; + ++masterIterator; + } + ASSERT_NE(masterIterator, generator.end()); + + EXPECT_EQ(*masterIterator, 2); + { + std::unique_lock guard{counterMutex}; + cv.wait(guard, [&]() { return proceedStage == 3; }); + ++counter; + ++masterIterator; + } + EXPECT_EQ(masterIterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyExhaustedMasterCausesFreeForAll) { + CacheableGenerator generator{testGenerator(3)}; + + (void)generator.begin(true); + + auto iterator1 = generator.begin(false); + auto iterator2 = generator.begin(false); + + ASSERT_NE(iterator1, generator.end()); + ASSERT_NE(iterator2, generator.end()); + + EXPECT_EQ(*iterator1, 0); + EXPECT_EQ(*iterator2, 0); + + ++iterator1; + ASSERT_NE(iterator1, generator.end()); + EXPECT_EQ(*iterator1, 1); + + ++iterator2; + ASSERT_NE(iterator2, generator.end()); + EXPECT_EQ(*iterator2, 1); + + ++iterator2; + ASSERT_NE(iterator2, generator.end()); + EXPECT_EQ(*iterator2, 2); + + ++iterator1; + ASSERT_NE(iterator1, generator.end()); + EXPECT_EQ(*iterator1, 2); + + ++iterator1; + EXPECT_EQ(iterator1, generator.end()); + + ++iterator2; + EXPECT_EQ(iterator2, generator.end()); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyExtractGeneratorGivesBackOriginal) { + CacheableGenerator generator{testGenerator(2)}; + + { + auto iterator = generator.begin(true); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(*iterator, 0); + } + + auto innerGenerator = std::move(generator).extractGenerator(); + auto iterator = innerGenerator.begin(); + ASSERT_NE(iterator, innerGenerator.end()); + EXPECT_EQ(*iterator, 1); + + ++iterator; + EXPECT_EQ(iterator, innerGenerator.end()); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyOnGeneratorFinishedIsCalled) { + CacheableGenerator generator{testGenerator(1)}; + + bool flag = false; + + generator.setOnGeneratorFinished([&](bool value) { + flag = true; + EXPECT_TRUE(value); + }); + auto iterator = generator.begin(true); + ASSERT_NE(iterator, generator.end()); + + EXPECT_FALSE(flag); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); + + EXPECT_TRUE(flag); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, + verifyOnGeneratorFinishedIsCalledCorrectlyWhenExpired) { + CacheableGenerator generator{testGenerator(2)}; + + bool flag = false; + + generator.setOnGeneratorFinished([&](bool value) { + flag = true; + EXPECT_FALSE(value); + }); + generator.setOnSizeChanged(std::identity{}); + + auto iterator = generator.begin(true); + ASSERT_NE(iterator, generator.end()); + + EXPECT_FALSE(flag); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + + EXPECT_FALSE(flag); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); + + EXPECT_TRUE(flag); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyOnGeneratorFinishedIsCalledWhenEmpty) { + CacheableGenerator generator{testGenerator(0)}; + + bool flag = false; + + generator.setOnGeneratorFinished([&](bool value) { + flag = true; + EXPECT_TRUE(value); + }); + auto iterator = generator.begin(true); + EXPECT_EQ(iterator, generator.end()); + + EXPECT_TRUE(flag); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyOnNextChunkComputedIsCalled) { + auto timedGenerator = []() -> generator { + while (true) { +#ifndef _QLEVER_NO_TIMING_TESTS + std::this_thread::sleep_for(2ms); +#endif + co_yield 0; + } + }(); + + uint32_t callCounter = 0; + + CacheableGenerator generator{std::move(timedGenerator)}; + + generator.setOnNextChunkComputed([&](auto duration) { +#ifndef _QLEVER_NO_TIMING_TESTS + using ::testing::AllOf; + using ::testing::Le; + using ::testing::Ge; + EXPECT_THAT(duration, AllOf(Le(3ms), Ge(1ms))); +#endif + ++callCounter; + }); + + { + auto masterIterator = generator.begin(true); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(masterIterator, generator.end()); + + ++masterIterator; + + EXPECT_EQ(callCounter, 2); + ASSERT_NE(masterIterator, generator.end()); + } + + { + auto slaveIterator1 = generator.begin(); + EXPECT_EQ(callCounter, 2); + ASSERT_NE(slaveIterator1, generator.end()); + + auto slaveIterator2 = generator.begin(); + EXPECT_EQ(callCounter, 2); + ASSERT_NE(slaveIterator2, generator.end()); + + ++slaveIterator2; + + EXPECT_EQ(callCounter, 2); + ASSERT_NE(slaveIterator2, generator.end()); + + ++slaveIterator2; + + EXPECT_EQ(callCounter, 3); + ASSERT_NE(slaveIterator2, generator.end()); + + ++slaveIterator1; + + EXPECT_EQ(callCounter, 3); + ASSERT_NE(slaveIterator1, generator.end()); + + ++slaveIterator1; + + EXPECT_EQ(callCounter, 3); + ASSERT_NE(slaveIterator1, generator.end()); + + ++slaveIterator1; + + EXPECT_EQ(callCounter, 4); + ASSERT_NE(slaveIterator1, generator.end()); + } + + auto slaveIterator3 = generator.begin(); + EXPECT_EQ(callCounter, 4); + ASSERT_NE(slaveIterator3, generator.end()); + + ++slaveIterator3; + + EXPECT_EQ(callCounter, 4); + ASSERT_NE(slaveIterator3, generator.end()); + + ++slaveIterator3; + + EXPECT_EQ(callCounter, 4); + ASSERT_NE(slaveIterator3, generator.end()); + + ++slaveIterator3; + + EXPECT_EQ(callCounter, 4); + ASSERT_NE(slaveIterator3, generator.end()); + + ++slaveIterator3; + + EXPECT_EQ(callCounter, 5); + ASSERT_NE(slaveIterator3, generator.end()); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { + CacheableGenerator generator{testGenerator(3)}; + uint32_t callCounter = 0; + generator.setOnSizeChanged([&](bool canShrink) { + ++callCounter; + return canShrink && callCounter > 2; + }); + + auto iterator = generator.begin(true); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(iterator, generator.end()); + + auto slaveIterator1 = generator.begin(); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(slaveIterator1, generator.end()); + EXPECT_EQ(*slaveIterator1, 0); + + ++iterator; + EXPECT_EQ(callCounter, 2); + ASSERT_NE(iterator, generator.end()); + + ++slaveIterator1; + EXPECT_EQ(callCounter, 2); + ASSERT_NE(slaveIterator1, generator.end()); + EXPECT_EQ(*slaveIterator1, 1); + + auto slaveIterator2 = generator.begin(); + EXPECT_EQ(callCounter, 2); + ASSERT_NE(slaveIterator2, generator.end()); + EXPECT_EQ(*slaveIterator2, 0); + + ++iterator; + EXPECT_EQ(callCounter, 5); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(*iterator, 2); + + ++iterator; + EXPECT_EQ(callCounter, 5); + EXPECT_EQ(iterator, generator.end()); + + ++slaveIterator1; + ASSERT_NE(slaveIterator1, generator.end()); + EXPECT_EQ(*slaveIterator1, 2); + + EXPECT_THROW(++slaveIterator2, ad_utility::IteratorExpired); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { + CacheableGenerator generator{testGenerator(3)}; + uint32_t callCounter = 0; + generator.setOnSizeChanged([&](bool canShrink) { + ++callCounter; + return canShrink && callCounter > 2; + }); + + auto iterator = generator.begin(true); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(iterator, generator.end()); + + auto slaveIterator = generator.begin(); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(slaveIterator, generator.end()); + + ++iterator; + EXPECT_EQ(callCounter, 2); + ASSERT_NE(iterator, generator.end()); + + ++slaveIterator; + EXPECT_EQ(callCounter, 2); + ASSERT_NE(slaveIterator, generator.end()); + + ++iterator; + EXPECT_EQ(callCounter, 5); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(*iterator, 2); + + ++iterator; + EXPECT_EQ(callCounter, 5); + EXPECT_EQ(iterator, generator.end()); + + ++slaveIterator; + ASSERT_NE(slaveIterator, generator.end()); + EXPECT_EQ(*slaveIterator, 2); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyShrinkStopsShrinking) { + CacheableGenerator generator{testGenerator(3)}; + uint32_t callCounter = 0; + generator.setOnSizeChanged([&](bool canShrink) { + ++callCounter; + return canShrink && callCounter == 3; + }); + + auto iterator = generator.begin(true); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(iterator, generator.end()); + + auto slaveIterator = generator.begin(); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(slaveIterator, generator.end()); + + ++iterator; + EXPECT_EQ(callCounter, 2); + ASSERT_NE(iterator, generator.end()); + + ++iterator; + EXPECT_EQ(callCounter, 4); + ASSERT_NE(iterator, generator.end()); + + ++iterator; + EXPECT_EQ(callCounter, 4); + EXPECT_EQ(iterator, generator.end()); + + ++slaveIterator; + ASSERT_NE(slaveIterator, generator.end()); + EXPECT_EQ(*slaveIterator, 1); + + ++slaveIterator; + ASSERT_NE(slaveIterator, generator.end()); + EXPECT_EQ(*slaveIterator, 2); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, testForEachCachedValueIteratesCorrectly) { + CacheableGenerator generator{testGenerator(3)}; + uint32_t callCounter = 0; + generator.setOnSizeChanged([&](bool canShrink) { + ++callCounter; + return canShrink && callCounter > 2; + }); + + auto iterator = generator.begin(true); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(iterator, generator.end()); + + uint32_t timesCalledFirst = 0; + generator.forEachCachedValue([&](int value) { + EXPECT_EQ(timesCalledFirst, value); + ++timesCalledFirst; + }); + EXPECT_EQ(timesCalledFirst, 1); + + ++iterator; + EXPECT_EQ(callCounter, 2); + ASSERT_NE(iterator, generator.end()); + + uint32_t timesCalledSecond = 0; + generator.forEachCachedValue([&](int value) { + EXPECT_EQ(timesCalledSecond, value); + ++timesCalledSecond; + }); + EXPECT_EQ(timesCalledSecond, 2); + + ++iterator; + EXPECT_EQ(callCounter, 5); + ASSERT_NE(iterator, generator.end()); + + uint32_t timesCalledThird = 0; + generator.forEachCachedValue([&](int value) { + EXPECT_EQ(timesCalledThird + 2, value); + ++timesCalledThird; + }); + EXPECT_EQ(timesCalledThird, 1); + + ++iterator; + EXPECT_EQ(callCounter, 5); + EXPECT_EQ(iterator, generator.end()); + + uint32_t timesCalledFourth = 0; + generator.forEachCachedValue([&](int value) { + EXPECT_EQ(timesCalledFourth + 2, value); + ++timesCalledFourth; + }); + EXPECT_EQ(timesCalledFourth, 1); +} From 15b435ec488c365956eec888209d7e07070db6b3 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 30 Jun 2024 00:18:56 +0200 Subject: [PATCH 060/133] Add Filter tests --- src/engine/Operation.cpp | 2 + test/CMakeLists.txt | 2 + test/FilterTest.cpp | 152 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 test/FilterTest.cpp diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index deffcd1ed8..41b6bedf6c 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -210,6 +210,8 @@ std::shared_ptr Operation::getResult( bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; + // TODO fix case where non-lazy request fetches cached lazy result + // and doesn't aggregate as this might break operations. auto result = pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, onlyReadFromCache) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 57145f5a18..a83d8b0cb2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -397,3 +397,5 @@ addLinkAndDiscoverTest(FsstCompressorTest fsst) addLinkAndDiscoverTest(IteratorWrapperTest) addLinkAndDiscoverTest(CacheableGeneratorTest) + +addLinkAndDiscoverTest(FilterTest) diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp new file mode 100644 index 0000000000..6f4e6827f4 --- /dev/null +++ b/test/FilterTest.cpp @@ -0,0 +1,152 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include + +#include "engine/Filter.h" +#include "engine/ValuesForTesting.h" +#include "engine/sparqlExpressions/LiteralExpression.h" +#include "util/IndexTestHelpers.h" + +using ::testing::ElementsAre; + +class LazyValueOperation : public Operation { + public: + std::vector getChildren() override { return {}; } + string getCacheKeyImpl() const override { return "Cache Key"; } + string getDescriptor() const override { return "Descriptor"; } + size_t getResultWidth() const override { return 0; } + size_t getCostEstimate() override { return 0; } + uint64_t getSizeEstimateBeforeLimit() override { return 0; } + float getMultiplicity(size_t) override { return 1; } + bool knownEmptyResult() override { return false; } + [[nodiscard]] vector resultSortedOn() const override { + return {}; + } + VariableToColumnMap computeVariableToColumnMap() const override { + return {{Variable{"?x"}, + ColumnIndexAndTypeInfo{ + 0, ColumnIndexAndTypeInfo::UndefStatus::AlwaysDefined}}}; + } + + std::vector idTables_; + + explicit LazyValueOperation(QueryExecutionContext* qec, + std::vector idTables) + : Operation{qec}, idTables_{std::move(idTables)} { + AD_CONTRACT_CHECK(!idTables_.empty()); + } + + Result computeResult(bool requestLaziness) override { + if (requestLaziness) { + std::vector clones; + clones.reserve(idTables_.size()); + for (const IdTable& idTable : idTables_) { + clones.push_back(idTable.clone()); + } + auto generator = [](auto idTables) -> cppcoro::generator { + for (IdTable& idTable : idTables) { + co_yield std::move(idTable); + } + }(std::move(clones)); + return {std::move(generator), resultSortedOn(), LocalVocab{}}; + } + IdTable aggregateTable{idTables_.at(0).numColumns(), + idTables_.at(0).getAllocator()}; + for (const IdTable& idTable : idTables_) { + aggregateTable.insertAtEnd(idTable); + } + return {std::move(aggregateTable), resultSortedOn(), LocalVocab{}}; + } +}; + +IdTable makeIdTable(std::initializer_list bools) { + IdTable idTable{1, ad_utility::makeUnlimitedAllocator()}; + for (bool b : bools) { + idTable.push_back({Id::makeFromBool(b)}); + } + return idTable; +} + +columnBasedIdTable::Row makeRow(bool b) { + columnBasedIdTable::Row row{1}; + row[0] = Id::makeFromBool(b); + return row; +} + +// _____________________________________________________________________________ +TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { + QueryExecutionContext* qec = ad_utility::testing::getQec(); + qec->getQueryTreeCache(); + std::vector idTables; + idTables.push_back(makeIdTable({true, true, false, false, true})); + idTables.push_back(makeIdTable({true, false})); + idTables.push_back(makeIdTable({})); + idTables.push_back(makeIdTable({false, false, false})); + idTables.push_back(makeIdTable({true})); + + LazyValueOperation values{qec, std::move(idTables)}; + QueryExecutionTree subTree{ + qec, std::make_shared(std::move(values))}; + Filter filter{ + qec, + std::make_shared(std::move(subTree)), + {std::make_unique(Variable{"?x"}), + "Expression ?x"}}; + + auto result = filter.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); + ASSERT_FALSE(result->isDataEvaluated()); + auto generator = result->idTables(); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + EXPECT_THAT(*iterator, + ElementsAre(makeRow(true), makeRow(true), makeRow(true))); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_THAT(*iterator, ElementsAre(makeRow(true))); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_THAT(*iterator, ElementsAre()); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_THAT(*iterator, ElementsAre()); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_THAT(*iterator, ElementsAre(makeRow(true))); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(Filter, verifyPredicateIsAppliedCorrectlyOnNonLazyEvaluation) { + QueryExecutionContext* qec = ad_utility::testing::getQec(); + qec->getQueryTreeCache(); + std::vector idTables; + idTables.push_back(makeIdTable({true, true, false, false, true})); + idTables.push_back(makeIdTable({true, false})); + idTables.push_back(makeIdTable({})); + idTables.push_back(makeIdTable({false, false, false})); + idTables.push_back(makeIdTable({true})); + + LazyValueOperation values{qec, std::move(idTables)}; + QueryExecutionTree subTree{ + qec, std::make_shared(std::move(values))}; + Filter filter{ + qec, + std::make_shared(std::move(subTree)), + {std::make_unique(Variable{"?x"}), + "Expression ?x"}}; + + auto result = filter.getResult(false, ComputationMode::FULLY_MATERIALIZED); + ASSERT_TRUE(result->isDataEvaluated()); + EXPECT_THAT(result->idTable(), + ElementsAre(makeRow(true), makeRow(true), makeRow(true), + makeRow(true), makeRow(true))); +} From 633bf0628d285f3eed05d46da5f92b9ba8bff9e2 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 30 Jun 2024 01:04:25 +0200 Subject: [PATCH 061/133] Clear Cache before running tests --- test/FilterTest.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index 6f4e6827f4..61fe3a0a64 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -14,7 +14,6 @@ using ::testing::ElementsAre; class LazyValueOperation : public Operation { public: std::vector getChildren() override { return {}; } - string getCacheKeyImpl() const override { return "Cache Key"; } string getDescriptor() const override { return "Descriptor"; } size_t getResultWidth() const override { return 0; } size_t getCostEstimate() override { return 0; } @@ -38,6 +37,20 @@ class LazyValueOperation : public Operation { AD_CONTRACT_CHECK(!idTables_.empty()); } + string getCacheKeyImpl() const override { + std::ostringstream stream; + for (const IdTable& idTable : idTables_) { + for (const auto& row : idTable) { + stream << "{ "; + for (const auto& cell : row) { + stream << cell << ' '; + } + stream << "}\n"; + } + } + return std::move(stream).str(); + } + Result computeResult(bool requestLaziness) override { if (requestLaziness) { std::vector clones; @@ -78,7 +91,7 @@ columnBasedIdTable::Row makeRow(bool b) { // _____________________________________________________________________________ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { QueryExecutionContext* qec = ad_utility::testing::getQec(); - qec->getQueryTreeCache(); + qec->getQueryTreeCache().clearAll(); std::vector idTables; idTables.push_back(makeIdTable({true, true, false, false, true})); idTables.push_back(makeIdTable({true, false})); @@ -127,7 +140,7 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { // _____________________________________________________________________________ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnNonLazyEvaluation) { QueryExecutionContext* qec = ad_utility::testing::getQec(); - qec->getQueryTreeCache(); + qec->getQueryTreeCache().clearAll(); std::vector idTables; idTables.push_back(makeIdTable({true, true, false, false, true})); idTables.push_back(makeIdTable({true, false})); From 6d5a95e20a7a640990658c85b39a3fa5b52c4282 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 30 Jun 2024 02:00:08 +0200 Subject: [PATCH 062/133] Add test to fix coverage --- test/CacheableGeneratorTest.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/CacheableGeneratorTest.cpp b/test/CacheableGeneratorTest.cpp index df78d5afa5..0230c30f75 100644 --- a/test/CacheableGeneratorTest.cpp +++ b/test/CacheableGeneratorTest.cpp @@ -510,6 +510,32 @@ TEST(CacheableGenerator, verifyShrinkStopsShrinking) { EXPECT_EQ(*slaveIterator, 2); } +TEST(CacheableGenerator, verifySlavesCantBlockMasterIterator) { + CacheableGenerator generator{testGenerator(3)}; + generator.setOnSizeChanged(std::identity{}); + + auto masterIterator = generator.begin(true); + ASSERT_NE(masterIterator, generator.end()); + EXPECT_EQ(*masterIterator, 0); + + auto slaveIterator = generator.begin(false); + ASSERT_NE(slaveIterator, generator.end()); + EXPECT_EQ(*slaveIterator, 0); + + ++masterIterator; + ASSERT_NE(masterIterator, generator.end()); + EXPECT_EQ(*masterIterator, 1); + + ++masterIterator; + ASSERT_NE(masterIterator, generator.end()); + EXPECT_EQ(*masterIterator, 2); + + EXPECT_THROW(*slaveIterator, ad_utility::IteratorExpired); + + ++masterIterator; + EXPECT_EQ(masterIterator, generator.end()); +} + // _____________________________________________________________________________ TEST(CacheableGenerator, testForEachCachedValueIteratesCorrectly) { CacheableGenerator generator{testGenerator(3)}; From 55b4fecbc4e21f8a902816f4e9ce72e48df48d1b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 30 Jun 2024 16:20:40 +0200 Subject: [PATCH 063/133] Address some sonarcloud issues --- src/engine/Operation.cpp | 20 +++++----- src/engine/QueryExecutionContext.h | 24 ++++++++---- src/engine/QueryExecutionTree.cpp | 2 +- src/engine/Result.cpp | 6 +-- src/engine/Result.h | 2 +- src/util/CacheableGenerator.h | 59 +++++++++++++----------------- 6 files changed, 57 insertions(+), 56 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 41b6bedf6c..fda9b86dac 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -195,7 +195,7 @@ std::shared_ptr Operation::getResult( [&cache, cacheKey](bool isComplete) mutable { if (isComplete) { cache.transformValue(cacheKey, [](const CacheValue& oldValue) { - return CacheValue{oldValue.resultTable()->aggregateTable(), + return CacheValue{oldValue.resultTable().aggregateTable(), oldValue.runtimeInfo()}; }); } @@ -223,30 +223,30 @@ std::shared_ptr Operation::getResult( } updateRuntimeInformationOnSuccess( - result, result._resultPointer->resultTable()->isDataEvaluated() + result, result._resultPointer->resultTable().isDataEvaluated() ? timer.msecs() : result._resultPointer->runtimeInfo().totalTime_); - if (result._resultPointer->resultTable()->isDataEvaluated()) { + if (result._resultPointer->resultTable().isDataEvaluated()) { auto resultNumRows = - result._resultPointer->resultTable()->idTable().size(); + result._resultPointer->resultTable().idTable().size(); auto resultNumCols = - result._resultPointer->resultTable()->idTable().numColumns(); + result._resultPointer->resultTable().idTable().numColumns(); LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " << resultNumCols << std::endl; } - if (result._resultPointer->resultTable()->isDataEvaluated()) { - return result._resultPointer->resultTable(); + if (result._resultPointer->resultTable().isDataEvaluated()) { + return result._resultPointer->resultTablePtr(); } else if (actuallyComputed) { return std::make_shared( Result::createResultAsMasterConsumer( - result._resultPointer->resultTable(), + result._resultPointer->resultTablePtr(), isRoot ? std::function{[this]() { signalQueryUpdate(); }} : std::function{})); } return std::make_shared(Result::createResultWithFallback( - result._resultPointer->resultTable(), std::move(computeLambda), + result._resultPointer->resultTablePtr(), std::move(computeLambda), [this, isRoot](auto duration) { runtimeInfo().totalTime_ += duration; if (isRoot) { @@ -342,7 +342,7 @@ void Operation::updateRuntimeInformationOnSuccess( const ConcurrentLruCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) { updateRuntimeInformationOnSuccess( - *resultAndCacheStatus._resultPointer->resultTable(), + resultAndCacheStatus._resultPointer->resultTable(), resultAndCacheStatus._cacheStatus, duration, resultAndCacheStatus._resultPointer->runtimeInfo()); } diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 746014deb9..ff7a28915c 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -22,26 +22,36 @@ class CacheValue { private: - std::shared_ptr _resultTable; + std::shared_ptr _resultTable; RuntimeInformation _runtimeInfo; public: explicit CacheValue(Result resultTable, RuntimeInformation runtimeInfo) - : _resultTable(std::make_shared(std::move(resultTable))), + : _resultTable(std::make_shared(std::move(resultTable))), _runtimeInfo(std::move(runtimeInfo)) {} - const std::shared_ptr& resultTable() const { + const Result& resultTable() const noexcept { return *_resultTable; } + + std::shared_ptr resultTablePtr() const noexcept { return _resultTable; } - const RuntimeInformation& runtimeInfo() const { return _runtimeInfo; } + const RuntimeInformation& runtimeInfo() const noexcept { + return _runtimeInfo; + } ~CacheValue() { if (!_resultTable->isDataEvaluated()) { // Clear listeners - const_cast(*_resultTable).setOnSizeChanged({}); - const_cast(*_resultTable).setOnGeneratorFinished({}); - const_cast(*_resultTable).setOnNextChunkComputed({}); + try { + _resultTable->setOnSizeChanged({}); + _resultTable->setOnGeneratorFinished({}); + _resultTable->setOnNextChunkComputed({}); + } catch (...) { + // Should never happen. The listeners only throw assertion errors + // if the result is evaluated. + std::exit(1); + } } } diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 2f7dd762ce..1fd420c339 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -119,7 +119,7 @@ void QueryExecutionTree::readFromCache() { auto& cache = qec_->getQueryTreeCache(); auto res = cache.getIfContained(getCacheKey()); if (res.has_value()) { - auto resultTable = res->_resultPointer->resultTable(); + auto resultTable = res->_resultPointer->resultTablePtr(); if (resultTable->isDataEvaluated()) { cachedResult_ = std::move(resultTable); } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 679b3fd8fa..ace3d13045 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -233,8 +233,8 @@ const IdTable& Result::idTable() const { cppcoro::generator Result::idTables() const { AD_CONTRACT_CHECK(!isDataEvaluated()); return std::visit( - [](auto& generator) -> cppcoro::generator { - if constexpr (!std::is_same_v) { + [](T& generator) -> cppcoro::generator { + if constexpr (!std::is_same_v) { for (auto&& idTable : generator) { co_yield idTable; } @@ -247,7 +247,7 @@ cppcoro::generator Result::idTables() const { } // _____________________________________________________________________________ -bool Result::isDataEvaluated() const { +bool Result::isDataEvaluated() const noexcept { return std::holds_alternative(data_); } diff --git a/src/engine/Result.h b/src/engine/Result.h index 8922c1c0b3..d82009ebbe 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -158,7 +158,7 @@ class Result { // (which is not possible with `shareLocalVocabFrom`). LocalVocab getCopyOfLocalVocab() const; - bool isDataEvaluated() const; + bool isDataEvaluated() const noexcept; // Log the size of this result. We call this at several places in // `Server::processQuery`. Ideally, this should only be called in one diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index c47ed28bb4..70f6abcb7f 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -17,7 +17,7 @@ namespace ad_utility { -class IteratorExpired : std::exception {}; +class IteratorExpired : public std::exception {}; template class CacheableGenerator { @@ -71,15 +71,13 @@ class CacheableGenerator { generatorIterator_.value() == generator_.end()) { return; } - if (masterState_ == MasterIteratorState::MASTER_STARTED) { - if (!isMaster) { - conditionVariable_.wait(lock, [this, index]() { - return (generatorIterator_.has_value() && - generatorIterator_.value() == generator_.end()) || - index < cachedValues_.size(); - }); - return; - } + if (masterState_ == MasterIteratorState::MASTER_STARTED && !isMaster) { + conditionVariable_.wait(lock, [this, index]() { + return (generatorIterator_.has_value() && + generatorIterator_.value() == generator_.end()) || + index < cachedValues_.size(); + }); + return; } auto start = std::chrono::steady_clock::now(); if (generatorIterator_.has_value()) { @@ -96,10 +94,8 @@ class CacheableGenerator { } if (generatorIterator_.value() != generator_.end()) { cachedValues_.emplace_back(std::move(*generatorIterator_.value())); - if (onSizeChanged_) { - if (onSizeChanged_(true)) { - tryShrinkCache(); - } + if (onSizeChanged_ && onSizeChanged_(true)) { + tryShrinkCache(); } } else if (onGeneratorFinished_) { onGeneratorFinished_(cachedValues_.empty() || @@ -135,18 +131,19 @@ class CacheableGenerator { conditionVariable_.notify_all(); } - void setOnSizeChanged(std::function onSizeChanged) { + void setOnSizeChanged(std::function onSizeChanged) noexcept { std::lock_guard lock{mutex_}; onSizeChanged_ = std::move(onSizeChanged); } - void setOnGeneratorFinished(std::function onGeneratorFinished) { + void setOnGeneratorFinished( + std::function onGeneratorFinished) noexcept { std::lock_guard lock{mutex_}; onGeneratorFinished_ = std::move(onGeneratorFinished); } - void setOnNextChunkComputed( - std::function onNextChunkComputed) { + void setOnNextChunkComputed(std::function + onNextChunkComputed) noexcept { std::lock_guard lock{mutex_}; onNextChunkComputed_ = std::move(onNextChunkComputed); } @@ -185,10 +182,11 @@ class CacheableGenerator { : computationStorage_{ std::make_shared(std::move(generator))} {} - CacheableGenerator(CacheableGenerator&& other) = default; - CacheableGenerator(const CacheableGenerator& other) = delete; - CacheableGenerator& operator=(CacheableGenerator&& other) = default; - CacheableGenerator& operator=(const CacheableGenerator& other) = delete; + CacheableGenerator(CacheableGenerator&& other) noexcept = default; + CacheableGenerator(const CacheableGenerator& other) noexcept = delete; + CacheableGenerator& operator=(CacheableGenerator&& other) noexcept = default; + CacheableGenerator& operator=(const CacheableGenerator& other) noexcept = + delete; class IteratorSentinel {}; @@ -221,18 +219,10 @@ class CacheableGenerator { return it.storage()->isDone(it.currentIndex_); } - friend bool operator!=(const Iterator& it, IteratorSentinel s) noexcept { - return !(it == s); - } - friend bool operator==(IteratorSentinel s, const Iterator& it) noexcept { return (it == s); } - friend bool operator!=(IteratorSentinel s, const Iterator& it) noexcept { - return it != s; - } - Iterator& operator++() { ++currentIndex_; storage()->advanceTo(currentIndex_, isMaster_); @@ -268,16 +258,17 @@ class CacheableGenerator { computationStorage_->forEachCachedValue(function); } - void setOnSizeChanged(std::function onSizeChanged) { + void setOnSizeChanged(std::function onSizeChanged) noexcept { computationStorage_->setOnSizeChanged(std::move(onSizeChanged)); } - void setOnGeneratorFinished(std::function onGeneratorFinished) { + void setOnGeneratorFinished( + std::function onGeneratorFinished) noexcept { computationStorage_->setOnGeneratorFinished(std::move(onGeneratorFinished)); } - void setOnNextChunkComputed( - std::function onNextChunkComputed) { + void setOnNextChunkComputed(std::function + onNextChunkComputed) noexcept { computationStorage_->setOnNextChunkComputed(std::move(onNextChunkComputed)); } }; From e5ceacc2517a5c7476c8076b7dcd595806c68cf6 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 30 Jun 2024 20:19:33 +0200 Subject: [PATCH 064/133] Add tests for ExportQueryExecutionTrees --- src/engine/ExportQueryExecutionTrees.cpp | 16 +- src/engine/ExportQueryExecutionTrees.h | 24 ++ test/CMakeLists.txt | 2 +- ....cpp => ExportQueryExecutionTreesTest.cpp} | 289 +++++++++++++++++- 4 files changed, 306 insertions(+), 25 deletions(-) rename test/{ExportQueryExecutionTreeTest.cpp => ExportQueryExecutionTreesTest.cpp} (77%) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index f92be076f2..fe58b3204a 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -13,14 +13,9 @@ #include "util/http/MediaTypes.h" // __________________________________________________________________________ -namespace { -struct IndexWithTable { - size_t index_; - const IdTable& idTable_; -}; - -cppcoro::generator getIdTables(const Result& result) { +cppcoro::generator ExportQueryExecutionTrees::getIdTables( + const Result& result) { if (result.isDataEvaluated()) { co_yield result.idTable(); } else { @@ -29,11 +24,13 @@ cppcoro::generator getIdTables(const Result& result) { } } } + // Return a range that contains the indices of the rows that have to be exported // from the `idTable` given the `LimitOffsetClause`. It takes into account the // LIMIT, the OFFSET, and the actual size of the `idTable` -cppcoro::generator getRowIndices(LimitOffsetClause limitOffset, - const Result& result) { +cppcoro::generator +ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, + const Result& result) { for (const IdTable& idTable : getIdTables(result)) { uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); @@ -47,7 +44,6 @@ cppcoro::generator getRowIndices(LimitOffsetClause limitOffset, } } } -} // namespace // _____________________________________________________________________________ cppcoro::generator diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 4e8db28e0e..a85e35e546 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -177,4 +177,28 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); + + struct IndexWithTable { + size_t index_; + const IdTable& idTable_; + }; + + static cppcoro::generator getIdTables(const Result& result); + // Return a range that contains the indices of the rows that have to be + // exported from the `idTable` given the `LimitOffsetClause`. It takes into + // account the LIMIT, the OFFSET, and the actual size of the `idTable` + static cppcoro::generator getRowIndices( + LimitOffsetClause limitOffset, const Result& result); + + FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator); + FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator); + FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable); + FRIEND_TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped); + FRIEND_TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenLastIsSkipped); + FRIEND_TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenFirstAndSecondArePartial); + FRIEND_TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenFirstAndLastArePartial); }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a83d8b0cb2..59ace1df30 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -302,7 +302,7 @@ addLinkAndDiscoverTestSerial(OrderByTest engine) addLinkAndDiscoverTestSerial(ValuesForTestingTest index) -addLinkAndDiscoverTestSerial(ExportQueryExecutionTreeTest index engine parser) +addLinkAndDiscoverTestSerial(ExportQueryExecutionTreesTest index engine parser) addLinkAndDiscoverTestSerial(AggregateExpressionTest parser sparqlExpressions index engine) diff --git a/test/ExportQueryExecutionTreeTest.cpp b/test/ExportQueryExecutionTreesTest.cpp similarity index 77% rename from test/ExportQueryExecutionTreeTest.cpp rename to test/ExportQueryExecutionTreesTest.cpp index 359a9407c6..b432c6b2b3 100644 --- a/test/ExportQueryExecutionTreeTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -210,7 +210,7 @@ static std::string makeXMLHeader( static const std::string xmlTrailer = "\n\n"; // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, Integers) { +TEST(ExportQueryExecutionTrees, Integers) { std::string kg = "

42 .

-42019234865781 .

4012934858173560"; std::string query = "SELECT ?o WHERE {?s ?p ?o} ORDER BY ?o"; @@ -276,7 +276,7 @@ TEST(ExportQueryExecutionTree, Integers) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, Bool) { +TEST(ExportQueryExecutionTrees, Bool) { std::string kg = "

true .

false."; std::string query = "SELECT ?o WHERE {?s ?p ?o} ORDER BY ?o"; @@ -330,7 +330,7 @@ TEST(ExportQueryExecutionTree, Bool) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, UnusedVariable) { +TEST(ExportQueryExecutionTrees, UnusedVariable) { std::string kg = "

true .

false."; std::string query = "SELECT ?o WHERE {?s ?p ?x} ORDER BY ?s"; std::string expectedXml = makeXMLHeader({"o"}) + R"( @@ -366,7 +366,7 @@ TEST(ExportQueryExecutionTree, UnusedVariable) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, Floats) { +TEST(ExportQueryExecutionTrees, Floats) { std::string kg = "

42.2 .

-42019234865.781e12 .

" "4.012934858173560e-12"; @@ -434,7 +434,7 @@ TEST(ExportQueryExecutionTree, Floats) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, Dates) { +TEST(ExportQueryExecutionTrees, Dates) { std::string kg = "

" "\"1950-01-01T00:00:00\"^^."; @@ -493,7 +493,7 @@ TEST(ExportQueryExecutionTree, Dates) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, Entities) { +TEST(ExportQueryExecutionTrees, Entities) { std::string kg = "PREFIX qlever: \n

qlever:o"; std::string query = "SELECT ?o WHERE {?s ?p ?o} ORDER BY ?o"; std::string expectedXml = makeXMLHeader({"o"}) + @@ -540,7 +540,7 @@ TEST(ExportQueryExecutionTree, Entities) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, LiteralWithLanguageTag) { +TEST(ExportQueryExecutionTrees, LiteralWithLanguageTag) { std::string kg = "

\"\"\"Some\"Where\tOver,\"\"\"@en-ca."; std::string query = "SELECT ?o WHERE {?s ?p ?o} ORDER BY ?o"; std::string expectedXml = makeXMLHeader({"o"}) + @@ -589,7 +589,7 @@ TEST(ExportQueryExecutionTree, LiteralWithLanguageTag) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, LiteralWithDatatype) { +TEST(ExportQueryExecutionTrees, LiteralWithDatatype) { std::string kg = "

\"something\"^^"; std::string query = "SELECT ?o WHERE {?s ?p ?o} ORDER BY ?o"; std::string expectedXml = makeXMLHeader({"o"}) + @@ -637,7 +637,7 @@ TEST(ExportQueryExecutionTree, LiteralWithDatatype) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, UndefinedValues) { +TEST(ExportQueryExecutionTrees, UndefinedValues) { std::string kg = "

"; std::string query = "SELECT ?o WHERE {?s

OPTIONAL {?s ?o}} ORDER BY ?o"; @@ -676,7 +676,7 @@ TEST(ExportQueryExecutionTree, UndefinedValues) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, BlankNode) { +TEST(ExportQueryExecutionTrees, BlankNode) { std::string kg = "

_:blank"; std::string objectQuery = "SELECT ?o WHERE {?s ?p ?o } ORDER BY ?o"; std::string expectedXml = makeXMLHeader({"o"}) + @@ -702,7 +702,7 @@ TEST(ExportQueryExecutionTree, BlankNode) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, TextIndex) { +TEST(ExportQueryExecutionTrees, TextIndex) { std::string kg = "

\"alpha beta\". \"alphax betax\". "; std::string objectQuery = "SELECT ?o WHERE {

?t. ?text ql:contains-entity ?t .?text " @@ -728,7 +728,7 @@ TEST(ExportQueryExecutionTree, TextIndex) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, MultipleVariables) { +TEST(ExportQueryExecutionTrees, MultipleVariables) { std::string kg = "

"; std::string objectQuery = "SELECT ?p ?o WHERE { ?p ?o } ORDER BY ?p ?o"; std::string expectedXml = makeXMLHeader({"p", "o"}) + @@ -765,7 +765,7 @@ TEST(ExportQueryExecutionTree, MultipleVariables) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, BinaryExport) { +TEST(ExportQueryExecutionTrees, BinaryExport) { std::string kg = "

31 . 42"; std::string query = "SELECT ?p ?o WHERE { ?p ?o } ORDER BY ?p ?o"; std::string result = @@ -790,7 +790,7 @@ TEST(ExportQueryExecutionTree, BinaryExport) { } // ____________________________________________________________________________ -TEST(ExportQueryExecutionTree, CornerCases) { +TEST(ExportQueryExecutionTrees, CornerCases) { std::string kg = "

"; std::string query = "SELECT ?p ?o WHERE { ?p ?o } ORDER BY ?p ?o"; std::string constructQuery = @@ -907,3 +907,264 @@ INSTANTIATE_TEST_SUITE_P(StreamableMediaTypes, StreamableMediaTypesFixture, // TODO Unit tests that also test for the export of text records from // the text index and thus systematically fill the coverage gaps. + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { + IdTable idTable{1, ad_utility::makeUnlimitedAllocator()}; + idTable.push_back({Id::makeFromInt(42)}); + idTable.push_back({Id::makeFromInt(1337)}); + + Result result{std::move(idTable), {}, LocalVocab{}}; + auto generator = ExportQueryExecutionTrees::getIdTables(result); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + ASSERT_EQ(iterator->size(), 2); + EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(42)); + EXPECT_EQ(iterator->at(1)[0], Id::makeFromInt(1337)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { + auto tableGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + idTable1.push_back({Id::makeFromInt(2)}); + idTable1.push_back({Id::makeFromInt(3)}); + + co_yield std::move(idTable1); + + IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; + idTable2.push_back({Id::makeFromInt(42)}); + idTable2.push_back({Id::makeFromInt(1337)}); + + co_yield std::move(idTable2); + }(); + + Result result = Result::createResultAsMasterConsumer( + std::make_shared( + Result{std::move(tableGenerator), {}, LocalVocab{}}), + []() {}); + auto generator = ExportQueryExecutionTrees::getIdTables(result); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + ASSERT_EQ(iterator->size(), 3); + EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(1)); + EXPECT_EQ(iterator->at(1)[0], Id::makeFromInt(2)); + EXPECT_EQ(iterator->at(2)[0], Id::makeFromInt(3)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + ASSERT_EQ(iterator->size(), 2); + EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(42)); + EXPECT_EQ(iterator->at(1)[0], Id::makeFromInt(1337)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { + auto tableGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + idTable1.push_back({Id::makeFromInt(2)}); + idTable1.push_back({Id::makeFromInt(3)}); + + co_yield std::move(idTable1); + }(); + + Result result = Result::createResultAsMasterConsumer( + std::make_shared( + Result{std::move(tableGenerator), {}, LocalVocab{}}), + []() {}); + auto generator = ExportQueryExecutionTrees::getRowIndices( + LimitOffsetClause{._limit = 1, ._offset = 1}, result); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(2)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped) { + auto tableGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + idTable1.push_back({Id::makeFromInt(2)}); + idTable1.push_back({Id::makeFromInt(3)}); + + co_yield std::move(idTable1); + + IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; + idTable2.push_back({Id::makeFromInt(4)}); + idTable2.push_back({Id::makeFromInt(5)}); + + co_yield std::move(idTable2); + }(); + + Result result = Result::createResultAsMasterConsumer( + std::make_shared( + Result{std::move(tableGenerator), {}, LocalVocab{}}), + []() {}); + auto generator = ExportQueryExecutionTrees::getRowIndices( + LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(4)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(5)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenLastIsSkipped) { + auto tableGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + idTable1.push_back({Id::makeFromInt(2)}); + idTable1.push_back({Id::makeFromInt(3)}); + + co_yield std::move(idTable1); + + IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; + idTable2.push_back({Id::makeFromInt(4)}); + idTable2.push_back({Id::makeFromInt(5)}); + + co_yield std::move(idTable2); + }(); + + Result result = Result::createResultAsMasterConsumer( + std::make_shared( + Result{std::move(tableGenerator), {}, LocalVocab{}}), + []() {}); + auto generator = ExportQueryExecutionTrees::getRowIndices( + LimitOffsetClause{._limit = 3}, result); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(1)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(2)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(3)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenFirstAndSecondArePartial) { + auto tableGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + idTable1.push_back({Id::makeFromInt(2)}); + idTable1.push_back({Id::makeFromInt(3)}); + + co_yield std::move(idTable1); + + IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; + idTable2.push_back({Id::makeFromInt(4)}); + idTable2.push_back({Id::makeFromInt(5)}); + + co_yield std::move(idTable2); + }(); + + Result result = Result::createResultAsMasterConsumer( + std::make_shared( + Result{std::move(tableGenerator), {}, LocalVocab{}}), + []() {}); + auto generator = ExportQueryExecutionTrees::getRowIndices( + LimitOffsetClause{._limit = 3, ._offset = 1}, result); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(2)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(3)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(4)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, + ensureCorrectSlicingOfIdTablesWhenFirstAndLastArePartial) { + auto tableGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + idTable1.push_back({Id::makeFromInt(2)}); + idTable1.push_back({Id::makeFromInt(3)}); + + co_yield std::move(idTable1); + + IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; + idTable2.push_back({Id::makeFromInt(4)}); + idTable2.push_back({Id::makeFromInt(5)}); + + co_yield std::move(idTable2); + + IdTable idTable3{1, ad_utility::makeUnlimitedAllocator()}; + idTable3.push_back({Id::makeFromInt(6)}); + idTable3.push_back({Id::makeFromInt(7)}); + idTable3.push_back({Id::makeFromInt(8)}); + idTable3.push_back({Id::makeFromInt(9)}); + + co_yield std::move(idTable3); + }(); + + Result result = Result::createResultAsMasterConsumer( + std::make_shared( + Result{std::move(tableGenerator), {}, LocalVocab{}}), + []() {}); + auto generator = ExportQueryExecutionTrees::getRowIndices( + LimitOffsetClause{._limit = 5, ._offset = 2}, result); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(3)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(4)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(5)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(6)); + + ++iterator; + ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(7)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); +} From d172dc8a2fef855adf193d6373349a31e39d4735 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 5 Jul 2024 20:36:43 +0200 Subject: [PATCH 065/133] Divide Result class into 3 dedicated classes --- src/engine/Bind.cpp | 2 +- src/engine/Bind.h | 2 +- src/engine/CartesianProductJoin.cpp | 2 +- src/engine/CartesianProductJoin.h | 2 +- src/engine/CountAvailablePredicates.cpp | 2 +- src/engine/CountAvailablePredicates.h | 2 +- src/engine/Distinct.cpp | 2 +- src/engine/Distinct.h | 2 +- src/engine/Filter.cpp | 2 +- src/engine/Filter.h | 2 +- src/engine/GroupBy.cpp | 2 +- src/engine/GroupBy.h | 2 +- src/engine/HasPredicateScan.cpp | 4 +- src/engine/HasPredicateScan.h | 6 +- src/engine/IndexScan.cpp | 2 +- src/engine/IndexScan.h | 2 +- src/engine/Join.cpp | 2 +- src/engine/Join.h | 2 +- src/engine/Minus.cpp | 2 +- src/engine/Minus.h | 2 +- src/engine/MultiColumnJoin.cpp | 2 +- src/engine/MultiColumnJoin.h | 2 +- src/engine/NeutralElementOperation.h | 2 +- src/engine/Operation.cpp | 27 +- src/engine/Operation.h | 6 +- src/engine/OptionalJoin.cpp | 2 +- src/engine/OptionalJoin.h | 2 +- src/engine/OrderBy.cpp | 2 +- src/engine/OrderBy.h | 2 +- src/engine/QueryExecutionContext.h | 11 +- src/engine/QueryExecutionTree.cpp | 4 +- src/engine/Result.cpp | 403 ++++++++++++------------ src/engine/Result.h | 237 +++++++++----- src/engine/Service.cpp | 2 +- src/engine/Service.h | 2 +- src/engine/Sort.cpp | 2 +- src/engine/Sort.h | 2 +- src/engine/TextIndexScanForEntity.cpp | 2 +- src/engine/TextIndexScanForEntity.h | 2 +- src/engine/TextIndexScanForWord.cpp | 2 +- src/engine/TextIndexScanForWord.h | 2 +- src/engine/TextLimit.cpp | 2 +- src/engine/TextLimit.h | 2 +- src/engine/TransitivePathImpl.h | 2 +- src/engine/Union.cpp | 6 +- src/engine/Union.h | 2 +- src/engine/Values.cpp | 2 +- src/engine/Values.h | 2 +- src/util/CacheableGenerator.h | 9 - test/CMakeLists.txt | 22 +- test/CacheableGeneratorTest.cpp | 19 -- test/ExportQueryExecutionTreesTest.cpp | 28 +- test/FilterTest.cpp | 4 +- test/OperationTest.cpp | 9 +- test/SparqlDataTypesTest.cpp | 10 +- test/engine/TextIndexScanTestHelpers.h | 6 +- test/engine/ValuesForTesting.h | 2 +- test/util/OperationTestHelpers.h | 4 +- 58 files changed, 490 insertions(+), 405 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 3f7af15c71..c4ab56bd36 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -81,7 +81,7 @@ std::vector Bind::getChildren() { } // _____________________________________________________________________________ -Result Bind::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Bind::computeResult([[maybe_unused]] bool requestLaziness) { using std::endl; LOG(DEBUG) << "Get input to BIND operation..." << endl; std::shared_ptr subRes = _subtree->getResult(); diff --git a/src/engine/Bind.h b/src/engine/Bind.h index f4a298c214..bb1996a967 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -46,7 +46,7 @@ class Bind : public Operation { [[nodiscard]] vector resultSortedOn() const override; private: - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; // Implementation for the binding of arbitrary expressions. template diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 23fd0da551..7a4c016cd0 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -132,7 +132,7 @@ void CartesianProductJoin::writeResultColumn(std::span targetColumn, } } // ____________________________________________________________________________ -Result CartesianProductJoin::computeResult( +ProtoResult CartesianProductJoin::computeResult( [[maybe_unused]] bool requestLaziness) { IdTable result{getExecutionContext()->getAllocator()}; result.setNumColumns(getResultWidth()); diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 0e8fc8ea3a..f83804e91d 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -82,7 +82,7 @@ class CartesianProductJoin : public Operation { private: //! Compute the result of the query-subtree rooted at this element.. - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; // Copy each element from the `inputColumn` `groupSize` times to the // `targetColumn`. Repeat until the `targetColumn` is completely filled. Skip diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index eaa4ffcbac..d8b321d1a7 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -100,7 +100,7 @@ size_t CountAvailablePredicates::getCostEstimate() { } // _____________________________________________________________________________ -Result CountAvailablePredicates::computeResult( +ProtoResult CountAvailablePredicates::computeResult( [[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "CountAvailablePredicates result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/CountAvailablePredicates.h b/src/engine/CountAvailablePredicates.h index 4565e031eb..1a804099f8 100644 --- a/src/engine/CountAvailablePredicates.h +++ b/src/engine/CountAvailablePredicates.h @@ -103,6 +103,6 @@ class CountAvailablePredicates : public Operation { void computePatternTrickAllEntities( IdTable* result, const CompactVectorOfStrings& patterns) const; - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp index 8393ef04f9..0fc69cf4b4 100644 --- a/src/engine/Distinct.cpp +++ b/src/engine/Distinct.cpp @@ -38,7 +38,7 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const { } // _____________________________________________________________________________ -Result Distinct::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Distinct::computeResult([[maybe_unused]] bool requestLaziness) { IdTable idTable{getExecutionContext()->getAllocator()}; LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl; std::shared_ptr subRes = _subtree->getResult(); diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h index 620fea4f4b..123f5f96fe 100644 --- a/src/engine/Distinct.h +++ b/src/engine/Distinct.h @@ -55,7 +55,7 @@ class Distinct : public Operation { [[nodiscard]] string getCacheKeyImpl() const override; private: - virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 72054bacbc..601c9193e2 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -43,7 +43,7 @@ string Filter::getDescriptor() const { } // _____________________________________________________________________________ -Result Filter::computeResult(bool requestLaziness) { +ProtoResult Filter::computeResult(bool requestLaziness) { LOG(DEBUG) << "Getting sub-result for Filter result computation..." << endl; std::shared_ptr subRes = _subtree->getResult(requestLaziness); LOG(DEBUG) << "Filter result computation..." << endl; diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 107349eebd..6a3ef107c7 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -58,7 +58,7 @@ class Filter : public Operation { return _subtree->getVariableColumns(); } - Result computeResult(bool requestLaziness) override; + ProtoResult computeResult(bool requestLaziness) override; template IdTable computeFilterImpl( diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 9b7ca63e2a..ac4653cc3c 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -309,7 +309,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput, *dynResult = std::move(result).toDynamic(); } -Result GroupBy::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult GroupBy::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "GroupBy result computation..." << std::endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index 6ec1886969..0d8f0a9ea9 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -89,7 +89,7 @@ class GroupBy : public Operation { private: VariableToColumnMap computeVariableToColumnMap() const override; - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; template void processGroup(const Aggregate& expression, diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index 9c863d939d..1ce406d84c 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -254,7 +254,7 @@ size_t HasPredicateScan::getCostEstimate() { } // ___________________________________________________________________________ -Result HasPredicateScan::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult HasPredicateScan::computeResult([[maybe_unused]] bool requestLaziness) { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); @@ -365,7 +365,7 @@ void HasPredicateScan::computeFullScan( // ___________________________________________________________________________ template -Result HasPredicateScan::computeSubqueryS( +ProtoResult HasPredicateScan::computeSubqueryS( IdTable* dynResult, const CompactVectorOfStrings& patterns) { auto subresult = subtree().getResult(); auto patternCol = subtreeColIdx(); diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h index 8022bca543..bfc1249858 100644 --- a/src/engine/HasPredicateScan.h +++ b/src/engine/HasPredicateScan.h @@ -105,11 +105,11 @@ class HasPredicateScan : public Operation { size_t resultSize); template - Result computeSubqueryS(IdTable* result, - const CompactVectorOfStrings& patterns); + ProtoResult computeSubqueryS(IdTable* result, + const CompactVectorOfStrings& patterns); private: - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 9ef45afac0..003d2b73eb 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -139,7 +139,7 @@ cppcoro::generator IndexScan::scanInChunks() const { } // _____________________________________________________________________________ -Result IndexScan::computeResult(bool requestLaziness) { +ProtoResult IndexScan::computeResult(bool requestLaziness) { LOG(DEBUG) << "IndexScan result computation...\n"; if (requestLaziness) { return {scanInChunks(), resultSortedOn(), LocalVocab{}}; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 515dc4feff..642eecf8f0 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -104,7 +104,7 @@ class IndexScan : public Operation { std::array getPermutedTriple() const; private: - Result computeResult(bool requestLaziness) override; + ProtoResult computeResult(bool requestLaziness) override; vector getChildren() override { return {}; } diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 9cf9fdc52a..87b71f5ad5 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -90,7 +90,7 @@ string Join::getCacheKeyImpl() const { string Join::getDescriptor() const { return "Join on " + _joinVar.name(); } // _____________________________________________________________________________ -Result Join::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Join::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Getting sub-results for join result computation..." << endl; size_t leftWidth = _left->getResultWidth(); size_t rightWidth = _right->getResultWidth(); diff --git a/src/engine/Join.h b/src/engine/Join.h index 13f8d5cf8c..9298df8023 100644 --- a/src/engine/Join.h +++ b/src/engine/Join.h @@ -115,7 +115,7 @@ class Join : public Operation { virtual string getCacheKeyImpl() const override; private: - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Minus.cpp b/src/engine/Minus.cpp index f3a3fd301b..ae3e870a1d 100644 --- a/src/engine/Minus.cpp +++ b/src/engine/Minus.cpp @@ -32,7 +32,7 @@ string Minus::getCacheKeyImpl() const { string Minus::getDescriptor() const { return "Minus"; } // _____________________________________________________________________________ -Result Minus::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Minus::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Minus result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/Minus.h b/src/engine/Minus.h index 115f4ccba4..92c4a49a2f 100644 --- a/src/engine/Minus.h +++ b/src/engine/Minus.h @@ -72,7 +72,7 @@ class Minus : public Operation { const IdTableView& a, const IdTableView& b, size_t ia, size_t ib, const vector>& matchedColumns); - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp index 20e3d491ce..c2733123ac 100644 --- a/src/engine/MultiColumnJoin.cpp +++ b/src/engine/MultiColumnJoin.cpp @@ -60,7 +60,7 @@ string MultiColumnJoin::getDescriptor() const { } // _____________________________________________________________________________ -Result MultiColumnJoin::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult MultiColumnJoin::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "MultiColumnJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/MultiColumnJoin.h b/src/engine/MultiColumnJoin.h index 6ed454e0ba..ff5e784718 100644 --- a/src/engine/MultiColumnJoin.h +++ b/src/engine/MultiColumnJoin.h @@ -63,7 +63,7 @@ class MultiColumnJoin : public Operation { IdTable* resultMightBeUnsorted); private: - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/NeutralElementOperation.h b/src/engine/NeutralElementOperation.h index f0b5cdb62e..a6d88d9b9a 100644 --- a/src/engine/NeutralElementOperation.h +++ b/src/engine/NeutralElementOperation.h @@ -40,7 +40,7 @@ class NeutralElementOperation : public Operation { }; private: - Result computeResult([[maybe_unused]] bool requestLaziness) override { + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(0); idTable.resize(1); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index fda9b86dac..07bc0b97c4 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -125,7 +125,7 @@ std::shared_ptr Operation::getResult( checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); - Result result = + ProtoResult result = computeResult(computationMode == ComputationMode::LAZY_IF_SUPPORTED); actuallyComputed = true; AD_CONTRACT_CHECK(computationMode == ComputationMode::LAZY_IF_SUPPORTED || @@ -149,7 +149,7 @@ std::shared_ptr Operation::getResult( // correctly because the result was computed, so we can pass `nullopt` as // the last argument. if (result.isDataEvaluated()) { - updateRuntimeInformationOnSuccess(result, + updateRuntimeInformationOnSuccess(result.idTable().size(), ad_utility::CacheStatus::computed, timer.msecs(), std::nullopt); } else { @@ -180,7 +180,7 @@ std::shared_ptr Operation::getResult( }; auto cacheSetup = [this, &computeLambda, &cache, &cacheKey]() { - auto result = computeLambda(); + auto result = CacheableResult{computeLambda()}; if (!result.isDataEvaluated()) { result.setOnSizeChanged([&cache, cacheKey](bool isShrinkable) { // TODO find out how to handle pinned entries properly. @@ -195,8 +195,9 @@ std::shared_ptr Operation::getResult( [&cache, cacheKey](bool isComplete) mutable { if (isComplete) { cache.transformValue(cacheKey, [](const CacheValue& oldValue) { - return CacheValue{oldValue.resultTable().aggregateTable(), - oldValue.runtimeInfo()}; + return CacheValue{ + CacheableResult{oldValue.resultTable().aggregateTable()}, + oldValue.runtimeInfo()}; }); } }); @@ -237,7 +238,9 @@ std::shared_ptr Operation::getResult( } if (result._resultPointer->resultTable().isDataEvaluated()) { - return result._resultPointer->resultTablePtr(); + return std::make_shared( + Result::createResultWithFullyEvaluatedIdTable( + result._resultPointer->resultTablePtr())); } else if (actuallyComputed) { return std::make_shared( Result::createResultAsMasterConsumer( @@ -299,12 +302,10 @@ std::chrono::milliseconds Operation::remainingTime() const { // _______________________________________________________________________ void Operation::updateRuntimeInformationOnSuccess( - const Result& resultTable, ad_utility::CacheStatus cacheStatus, - Milliseconds duration, std::optional runtimeInfo) { + size_t numRows, ad_utility::CacheStatus cacheStatus, Milliseconds duration, + std::optional runtimeInfo) { _runtimeInfo->totalTime_ = duration; - // TODO find a better representation for "unknown" than 0. - _runtimeInfo->numRows_ = - resultTable.isDataEvaluated() ? resultTable.idTable().size() : 0; + _runtimeInfo->numRows_ = numRows; _runtimeInfo->cacheStatus_ = cacheStatus; _runtimeInfo->status_ = RuntimeInformation::Status::fullyMaterialized; @@ -341,8 +342,10 @@ void Operation::updateRuntimeInformationOnSuccess( void Operation::updateRuntimeInformationOnSuccess( const ConcurrentLruCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) { + const auto& result = resultAndCacheStatus._resultPointer->resultTable(); updateRuntimeInformationOnSuccess( - resultAndCacheStatus._resultPointer->resultTable(), + // TODO find a better representation for "unknown" than 0. + result.isDataEvaluated() ? result.idTable().size() : 0, resultAndCacheStatus._cacheStatus, duration, resultAndCacheStatus._resultPointer->runtimeInfo()); } diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 9649dd8689..2633be2648 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -208,7 +208,7 @@ class Operation { // Direct access to the `computeResult()` method. This should be only used for // testing, otherwise the `getResult()` function should be used which also // sets the runtime info and uses the cache. - virtual Result computeResultOnlyForTesting( + virtual ProtoResult computeResultOnlyForTesting( bool requestLaziness = false) final { return computeResult(requestLaziness); } @@ -260,7 +260,7 @@ class Operation { private: //! Compute the result of the query-subtree rooted at this element.. - virtual Result computeResult(bool requestLaziness) = 0; + virtual ProtoResult computeResult(bool requestLaziness) = 0; // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. @@ -274,7 +274,7 @@ class Operation { // allowed when `cacheStatus` is `cachedPinned` or `cachedNotPinned`, // otherwise a runtime check will fail. virtual void updateRuntimeInformationOnSuccess( - const Result& resultTable, ad_utility::CacheStatus cacheStatus, + size_t numRows, ad_utility::CacheStatus cacheStatus, Milliseconds duration, std::optional runtimeInfo) final; diff --git a/src/engine/OptionalJoin.cpp b/src/engine/OptionalJoin.cpp index 0f042681cc..45c7c2d0b5 100644 --- a/src/engine/OptionalJoin.cpp +++ b/src/engine/OptionalJoin.cpp @@ -89,7 +89,7 @@ string OptionalJoin::getDescriptor() const { } // _____________________________________________________________________________ -Result OptionalJoin::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult OptionalJoin::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "OptionalJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h index 37f712409a..91d1a502d9 100644 --- a/src/engine/OptionalJoin.h +++ b/src/engine/OptionalJoin.h @@ -75,7 +75,7 @@ class OptionalJoin : public Operation { private: void computeSizeEstimateAndMultiplicities(); - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/OrderBy.cpp b/src/engine/OrderBy.cpp index da005cf352..2f177b8997 100644 --- a/src/engine/OrderBy.cpp +++ b/src/engine/OrderBy.cpp @@ -63,7 +63,7 @@ std::string OrderBy::getDescriptor() const { } // _____________________________________________________________________________ -Result OrderBy::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult OrderBy::computeResult([[maybe_unused]] bool requestLaziness) { using std::endl; LOG(DEBUG) << "Getting sub-result for OrderBy result computation..." << endl; std::shared_ptr subRes = subtree_->getResult(); diff --git a/src/engine/OrderBy.h b/src/engine/OrderBy.h index 69289ae75e..a04d187ce4 100644 --- a/src/engine/OrderBy.h +++ b/src/engine/OrderBy.h @@ -78,7 +78,7 @@ class OrderBy : public Operation { } private: - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override { return subtree_->getVariableColumns(); diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index ff7a28915c..1e19883d18 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -22,17 +22,18 @@ class CacheValue { private: - std::shared_ptr _resultTable; + std::shared_ptr _resultTable; RuntimeInformation _runtimeInfo; public: - explicit CacheValue(Result resultTable, RuntimeInformation runtimeInfo) - : _resultTable(std::make_shared(std::move(resultTable))), + explicit CacheValue(CacheableResult resultTable, + RuntimeInformation runtimeInfo) + : _resultTable(std::make_shared(std::move(resultTable))), _runtimeInfo(std::move(runtimeInfo)) {} - const Result& resultTable() const noexcept { return *_resultTable; } + const CacheableResult& resultTable() const noexcept { return *_resultTable; } - std::shared_ptr resultTablePtr() const noexcept { + std::shared_ptr resultTablePtr() const noexcept { return _resultTable; } diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 1fd420c339..dc018c8430 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -121,7 +121,9 @@ void QueryExecutionTree::readFromCache() { if (res.has_value()) { auto resultTable = res->_resultPointer->resultTablePtr(); if (resultTable->isDataEvaluated()) { - cachedResult_ = std::move(resultTable); + cachedResult_ = std::make_shared( + Result::createResultWithFullyEvaluatedIdTable( + std::move(resultTable))); } } } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index ace3d13045..0fe0ba3664 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -12,94 +12,6 @@ #include "util/Log.h" #include "util/Timer.h" -// _____________________________________________________________________________ -string Result::asDebugString() const { - std::ostringstream os; - os << "First (up to) 5 rows of result with size:\n"; - for (size_t i = 0; i < std::min(5, idTable().size()); ++i) { - for (size_t j = 0; j < idTable().numColumns(); ++j) { - os << idTable()(i, j) << '\t'; - } - os << '\n'; - } - return std::move(os).str(); -} - -// _____________________________________________________________________________ -auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) - -> SharedLocalVocabWrapper { - return getMergedLocalVocab( - std::array{std::cref(result1), std::cref(result2)}); -} - -// _____________________________________________________________________________ -LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } - -// _____________________________________________________________________________ -void Result::validateIdTable(const IdTable& idTable, - const std::vector& sortedBy) { - AD_CONTRACT_CHECK(std::ranges::all_of(sortedBy, [&idTable](size_t numCols) { - return numCols < idTable.numColumns(); - })); - - [[maybe_unused]] auto compareRowsByJoinColumns = - [&sortedBy](const auto& row1, const auto& row2) { - for (size_t col : sortedBy) { - if (row1[col] != row2[col]) { - return row1[col] < row2[col]; - } - } - return false; - }; - AD_EXPENSIVE_CHECK(std::ranges::is_sorted(idTable, compareRowsByJoinColumns)); -} - -// _____________________________________________________________________________ -Result::Result(IdTable idTable, std::vector sortedBy, - SharedLocalVocabWrapper localVocab) - : data_{std::move(idTable)}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab.localVocab_)} { - AD_CONTRACT_CHECK(localVocab_ != nullptr); - validateIdTable(std::get(data_), sortedBy_); -} - -// _____________________________________________________________________________ -Result::Result(IdTable idTable, std::vector sortedBy, - LocalVocab&& localVocab) - : Result{std::move(idTable), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} - -// _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, - SharedLocalVocabWrapper localVocab) - : data_{ad_utility::CacheableGenerator{ - [](auto idTables, - auto sortedBy) mutable -> cppcoro::generator { - for (IdTable& idTable : idTables) { - validateIdTable(idTable, sortedBy); - co_yield std::move(idTable); - } - }(std::move(idTables), sortedBy)}}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab.localVocab_)} { - AD_CONTRACT_CHECK(localVocab_ != nullptr); -} - -// _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocab&& localVocab) - : Result{std::move(idTables), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} - -// _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab) - : data_{std::move(idTables)}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab)} {} - // _____________________________________________________________________________ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { std::ranges::for_each( @@ -117,21 +29,56 @@ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { } // _____________________________________________________________________________ -void Result::applyLimitOffset( +ProtoResult::ProtoResult(IdTable idTable, std::vector sortedBy, + SharedLocalVocabWrapper localVocab) + : storage_{StorageType{std::move(idTable), std::move(sortedBy), + std::move(localVocab.localVocab_)}} { + AD_CONTRACT_CHECK(storage_.localVocab_ != nullptr); + validateIdTable(storage_.idTable(), storage_.sortedBy_); +} + +// _____________________________________________________________________________ +ProtoResult::ProtoResult(IdTable idTable, std::vector sortedBy, + LocalVocab&& localVocab) + : ProtoResult{std::move(idTable), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}} {} + +// _____________________________________________________________________________ +ProtoResult::ProtoResult(cppcoro::generator idTables, + std::vector sortedBy, + SharedLocalVocabWrapper localVocab) + : storage_{ + StorageType{[](auto idTables, + auto sortedBy) mutable -> cppcoro::generator { + for (IdTable& idTable : idTables) { + validateIdTable(idTable, sortedBy); + co_yield std::move(idTable); + } + }(std::move(idTables), sortedBy), + std::move(sortedBy), std::move(localVocab.localVocab_)}} { + AD_CONTRACT_CHECK(storage_.localVocab_ != nullptr); +} + +// _____________________________________________________________________________ +ProtoResult::ProtoResult(cppcoro::generator idTables, + std::vector sortedBy, + LocalVocab&& localVocab) + : ProtoResult{std::move(idTables), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}} {} + +// _____________________________________________________________________________ +void ProtoResult::applyLimitOffset( const LimitOffsetClause& limitOffset, std::function limitTimeCallback) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). AD_CONTRACT_CHECK(limitTimeCallback); - AD_CONTRACT_CHECK( - !std::holds_alternative>(data_)); - using Gen = ad_utility::CacheableGenerator; - if (std::holds_alternative(data_)) { + if (storage_.isDataEvaluated()) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; - modifyIdTable(std::get(data_), limitOffset); + modifyIdTable(storage_.idTable(), limitOffset); limitTimeCallback(limitTimer.msecs()); - } else if (std::holds_alternative(data_)) { + } else { auto generator = [](cppcoro::generator original, LimitOffsetClause limitOffset, std::function limitTimeCallback) @@ -157,23 +104,18 @@ void Result::applyLimitOffset( break; } } - }(std::move(std::get(data_)).extractGenerator(), limitOffset, + }(std::move(storage_.idTables()), limitOffset, std::move(limitTimeCallback)); - data_.emplace(std::move(generator)); - } else { - AD_FAIL(); + storage_.idTables() = std::move(generator); } } // _____________________________________________________________________________ -void Result::enforceLimitOffset(const LimitOffsetClause& limitOffset) { - AD_CONTRACT_CHECK( - !std::holds_alternative>(data_)); - using Gen = ad_utility::CacheableGenerator; - if (std::holds_alternative(data_)) { - AD_CONTRACT_CHECK(idTable().numRows() == - limitOffset.actualSize(idTable().numRows())); - } else if (std::holds_alternative(data_)) { +void ProtoResult::enforceLimitOffset(const LimitOffsetClause& limitOffset) { + if (storage_.isDataEvaluated()) { + AD_CONTRACT_CHECK(storage_.idTable().numRows() == + limitOffset.actualSize(storage_.idTable().numRows())); + } else { auto generator = [](cppcoro::generator original, LimitOffsetClause limitOffset) -> cppcoro::generator { @@ -184,20 +126,31 @@ void Result::enforceLimitOffset(const LimitOffsetClause& limitOffset) { co_yield std::move(idTable); } AD_CONTRACT_CHECK(elementCount == limitOffset.actualSize(elementCount)); - }(std::move(std::get(data_)).extractGenerator(), limitOffset); - data_.emplace(std::move(generator)); - } else { - AD_FAIL(); + }(std::move(storage_.idTables()), limitOffset); + storage_.idTables() = std::move(generator); } } +// _____________________________________________________________ +bool ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { + AD_CONTRACT_CHECK(storage_.isDataEvaluated()); + const auto& datatypesPerColumn = getOrComputeDatatypeCountsPerColumn(); + return std::ranges::all_of(varColMap, [&](const auto& varAndCol) { + const auto& [columnIndex, mightContainUndef] = varAndCol.second; + bool hasUndefined = datatypesPerColumn.at(columnIndex) + .at(static_cast(Datatype::Undefined)) != 0; + return mightContainUndef == ColumnIndexAndTypeInfo::PossiblyUndefined || + !hasUndefined; + }); +} + // _____________________________________________________________________________ -auto Result::getOrComputeDatatypeCountsPerColumn() +auto ProtoResult::getOrComputeDatatypeCountsPerColumn() -> const DatatypeCountsPerColumn& { if (datatypeCountsPerColumn_.has_value()) { return datatypeCountsPerColumn_.value(); } - auto& idTable = std::get(data_); + auto& idTable = storage_.idTable(); auto& types = datatypeCountsPerColumn_.emplace(); types.resize(idTable.numColumns()); for (size_t i = 0; i < idTable.numColumns(); ++i) { @@ -210,71 +163,55 @@ auto Result::getOrComputeDatatypeCountsPerColumn() return types; } -// _____________________________________________________________ -bool Result::checkDefinedness(const VariableToColumnMap& varColMap) { - AD_CONTRACT_CHECK(isDataEvaluated()); - const auto& datatypesPerColumn = getOrComputeDatatypeCountsPerColumn(); - return std::ranges::all_of(varColMap, [&](const auto& varAndCol) { - const auto& [columnIndex, mightContainUndef] = varAndCol.second; - bool hasUndefined = datatypesPerColumn.at(columnIndex) - .at(static_cast(Datatype::Undefined)) != 0; - return mightContainUndef == ColumnIndexAndTypeInfo::PossiblyUndefined || - !hasUndefined; - }); -} - // _____________________________________________________________________________ -const IdTable& Result::idTable() const { - AD_CONTRACT_CHECK(isDataEvaluated()); - return std::get(data_); -} +void ProtoResult::validateIdTable(const IdTable& idTable, + const std::vector& sortedBy) { + AD_CONTRACT_CHECK(std::ranges::all_of(sortedBy, [&idTable](size_t numCols) { + return numCols < idTable.numColumns(); + })); -// _____________________________________________________________________________ -cppcoro::generator Result::idTables() const { - AD_CONTRACT_CHECK(!isDataEvaluated()); - return std::visit( - [](T& generator) -> cppcoro::generator { - if constexpr (!std::is_same_v) { - for (auto&& idTable : generator) { - co_yield idTable; + [[maybe_unused]] auto compareRowsByJoinColumns = + [&sortedBy](const auto& row1, const auto& row2) { + for (size_t col : sortedBy) { + if (row1[col] != row2[col]) { + return row1[col] < row2[col]; } - } else { - // Type of variant here should never be `IdTable` - AD_FAIL(); } - }, - data_); + return false; + }; + AD_EXPENSIVE_CHECK(std::ranges::is_sorted(idTable, compareRowsByJoinColumns)); } // _____________________________________________________________________________ -bool Result::isDataEvaluated() const noexcept { - return std::holds_alternative(data_); -} +const IdTable& ProtoResult::idTable() const { return storage_.idTable(); } // _____________________________________________________________________________ -void Result::logResultSize() const { - if (isDataEvaluated()) { - LOG(INFO) << "Result has size " << idTable().size() << " x " - << idTable().numColumns() << std::endl; - } else { - LOG(INFO) << "Result has unknown size (not computed yet)" << std::endl; - } +bool ProtoResult::isDataEvaluated() const noexcept { + return storage_.isDataEvaluated(); } +// _____________________________________________________________________________ +CacheableResult::CacheableResult(ProtoResult protoResult) + : storage_{StorageType{ + protoResult.isDataEvaluated() + ? decltype(StorageType::data_){std::move( + protoResult.storage_.idTable())} + : decltype(StorageType::data_){ad_utility::CacheableGenerator{ + std::move(protoResult.storage_.idTables())}}, + std::move(protoResult.storage_.sortedBy_), + std::move(protoResult.storage_.localVocab_), + }} {} -ad_utility::MemorySize Result::getCurrentSize() const { +// _____________________________________________________________________________ +ad_utility::MemorySize CacheableResult::getCurrentSize() const { auto calculateSize = [](const IdTable& idTable) { return ad_utility::MemorySize::bytes(idTable.size() * idTable.numColumns() * sizeof(Id)); }; - if (isDataEvaluated()) { + if (storage_.isDataEvaluated()) { return calculateSize(idTable()); } else { - using Gen = ad_utility::CacheableGenerator; - // This should only ever get called on the "wrapped" generator stored in the - // cache. - AD_CONTRACT_CHECK(std::holds_alternative(data_)); ad_utility::MemorySize totalMemory = 0_B; - std::get(data_).forEachCachedValue( + storage_.idTables().forEachCachedValue( [&totalMemory, &calculateSize](const IdTable& idTable) { totalMemory += calculateSize(idTable); }); @@ -283,41 +220,29 @@ ad_utility::MemorySize Result::getCurrentSize() const { } // _____________________________________________________________________________ -void Result::setOnSizeChanged(std::function onSizeChanged) { - using Gen = ad_utility::CacheableGenerator; - // This should only ever get called on the "wrapped" generator stored in the - // cache. - AD_CONTRACT_CHECK(std::holds_alternative(data_)); - std::get(data_).setOnSizeChanged(std::move(onSizeChanged)); +void CacheableResult::setOnSizeChanged( + std::function onSizeChanged) { + storage_.idTables().setOnSizeChanged(std::move(onSizeChanged)); } // _____________________________________________________________________________ -void Result::setOnGeneratorFinished( +void CacheableResult::setOnGeneratorFinished( std::function onGeneratorFinished) { - using Gen = ad_utility::CacheableGenerator; - // This should only ever get called on the "wrapped" generator stored in the - // cache. - AD_CONTRACT_CHECK(std::holds_alternative(data_)); - std::get(data_).setOnGeneratorFinished(std::move(onGeneratorFinished)); + storage_.idTables().setOnGeneratorFinished(std::move(onGeneratorFinished)); } // _____________________________________________________________________________ -void Result::setOnNextChunkComputed( +void CacheableResult::setOnNextChunkComputed( std::function onNextChunkComputed) { - using Gen = ad_utility::CacheableGenerator; - // This should only ever get called on the "wrapped" generator stored in the - // cache. - AD_CONTRACT_CHECK(std::holds_alternative(data_)); - std::get(data_).setOnNextChunkComputed(std::move(onNextChunkComputed)); + storage_.idTables().setOnNextChunkComputed(std::move(onNextChunkComputed)); } -Result Result::aggregateTable() const { - using Gen = ad_utility::CacheableGenerator; - AD_CONTRACT_CHECK(std::holds_alternative(data_)); +// _____________________________________________________________________________ +ProtoResult CacheableResult::aggregateTable() const { size_t totalRows = 0; size_t numCols = 0; std::optional allocator; - std::get(data_).forEachCachedValue( + storage_.idTables().forEachCachedValue( [&totalRows, &numCols, &allocator](const IdTable& table) { totalRows += table.numRows(); if (numCols == 0) { @@ -330,24 +255,104 @@ Result Result::aggregateTable() const { IdTable idTable{ numCols, std::move(allocator).value_or(makeAllocatorWithLimit(0_B))}; idTable.reserve(totalRows); - std::get(data_).forEachCachedValue([&idTable](const IdTable& table) { + storage_.idTables().forEachCachedValue([&idTable](const IdTable& table) { idTable.insertAtEnd(table.begin(), table.end()); }); - return Result{std::move(idTable), sortedBy_, - SharedLocalVocabWrapper{localVocab_}}; + return ProtoResult{ + std::move(idTable), storage_.sortedBy_, + ProtoResult::SharedLocalVocabWrapper{storage_.localVocab_}}; +} + +// _____________________________________________________________________________ +const IdTable& CacheableResult::idTable() const { return storage_.idTable(); } + +// _____________________________________________________________________________ +bool CacheableResult::isDataEvaluated() const noexcept { + return storage_.isDataEvaluated(); +} + +// _____________________________________________________________________________ +Result::Result(std::shared_ptr idTable, + std::vector sortedBy, LocalVocabPtr localVocab) + : storage_{StorageType{std::move(idTable), std::move(sortedBy), + std::move(localVocab)}} {} + +// _____________________________________________________________________________ +Result::Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocabPtr localVocab) + : storage_{StorageType{std::move(idTables), std::move(sortedBy), + std::move(localVocab)}} {} + +// _____________________________________________________________________________ +const IdTable& Result::idTable() const { return *storage_.idTable(); } + +// _____________________________________________________________________________ +cppcoro::generator& Result::idTables() const { + return storage_.idTables(); +} + +// _____________________________________________________________________________ +auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) + -> SharedLocalVocabWrapper { + return getMergedLocalVocab( + std::array{std::cref(result1), std::cref(result2)}); +} + +// _____________________________________________________________________________ +LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } + +// _____________________________________________________________________________ +bool Result::isDataEvaluated() const noexcept { + return storage_.isDataEvaluated(); +} + +// _____________________________________________________________________________ +void Result::logResultSize() const { + if (isDataEvaluated()) { + LOG(INFO) << "Result has size " << idTable().size() << " x " + << idTable().numColumns() << std::endl; + } else { + LOG(INFO) << "Result has unknown size (not computed yet)" << std::endl; + } +} + +// _____________________________________________________________________________ +string Result::asDebugString() const { + std::ostringstream os; + os << "First (up to) 5 rows of result with size:\n"; + for (size_t i = 0; i < std::min(5, idTable().size()); ++i) { + for (size_t j = 0; j < idTable().numColumns(); ++j) { + os << idTable()(i, j) << '\t'; + } + os << '\n'; + } + return std::move(os).str(); +} + +// _____________________________________________________________________________ +Result Result::createResultWithFullyEvaluatedIdTable( + std::shared_ptr cacheableResult) { + AD_CONTRACT_CHECK(cacheableResult->isDataEvaluated()); + auto sortedBy = cacheableResult->storage_.sortedBy_; + auto localVocab = cacheableResult->storage_.localVocab_; + const IdTable* tablePointer = &cacheableResult->idTable(); + return Result{ + std::shared_ptr{std::move(cacheableResult), tablePointer}, + std::move(sortedBy), std::move(localVocab)}; } // _____________________________________________________________________________ Result Result::createResultWithFallback( - std::shared_ptr original, std::function fallback, + std::shared_ptr original, + std::function fallback, std::function onIteration) { AD_CONTRACT_CHECK(!original->isDataEvaluated()); - auto generator = [](std::shared_ptr sharedResult, - std::function fallback, + auto generator = [](std::shared_ptr sharedResult, + std::function fallback, auto onIteration) -> cppcoro::generator { size_t index = 0; try { - for (auto&& idTable : sharedResult->idTables()) { + for (auto&& idTable : sharedResult->storage_.idTables()) { co_yield idTable; index++; } @@ -358,12 +363,12 @@ Result Result::createResultWithFallback( } catch (...) { throw; } - Result freshResult = fallback(); + ProtoResult freshResult = fallback(); // If data is evaluated this means that this process is not deterministic // or that there's a wrong callback used here. AD_CORRECTNESS_CHECK(!freshResult.isDataEvaluated()); auto start = std::chrono::steady_clock::now(); - for (auto&& idTable : freshResult.idTables()) { + for (auto&& idTable : freshResult.storage_.idTables()) { auto stop = std::chrono::steady_clock::now(); if (onIteration) { onIteration(std::chrono::duration_cast( @@ -384,18 +389,18 @@ Result Result::createResultWithFallback( }; return Result{ generator(original, std::move(fallback), std::move(onIteration)), - original->sortedBy_, original->localVocab_}; + original->storage_.sortedBy_, original->storage_.localVocab_}; } // _____________________________________________________________________________ Result Result::createResultAsMasterConsumer( - std::shared_ptr original, std::function onIteration) { - using Gen = ad_utility::CacheableGenerator; - AD_CONTRACT_CHECK(std::holds_alternative(original->data_)); + std::shared_ptr original, + std::function onIteration) { + AD_CONTRACT_CHECK(!original->isDataEvaluated()); auto generator = [](auto original, auto onIteration) -> cppcoro::generator { using ad_utility::IteratorWrapper; - auto& generator = std::get(original->data_); + auto& generator = original->storage_.idTables(); for (const IdTable& idTable : IteratorWrapper{generator, true}) { if (onIteration) { onIteration(); @@ -403,6 +408,8 @@ Result Result::createResultAsMasterConsumer( co_yield idTable; } }; - return Result{generator(original, std::move(onIteration)), - original->sortedBy_, original->localVocab_}; + auto sortedBy = original->storage_.sortedBy_; + auto localVocab = original->storage_.localVocab_; + return Result{generator(std::move(original), std::move(onIteration)), + std::move(sortedBy), std::move(localVocab)}; } diff --git a/src/engine/Result.h b/src/engine/Result.h index d82009ebbe..52e734d7ef 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -18,24 +18,62 @@ #include "util/CacheableGenerator.h" #include "util/MemorySize/MemorySize.h" -// The result of an `Operation`. This is the class QLever uses for all -// intermediate or final results when processing a SPARQL query. The actual data -// is always a table and contained in the member `idTable()`. -class Result { - private: - using Data = std::variant, - cppcoro::generator>; - // The actual entries. Since generators need to be modified - // in order to be consumed, this needs to be mutable. - mutable Data data_; +template +class ResultStorage { + friend class ProtoResult; + friend class CacheableResult; + friend class Result; + + using Data = std::variant; + // The actual entries. + Data data_; // The column indices by which the result is sorted (primary sort key first). // Empty if the result is not sorted on any column. std::vector sortedBy_; - using LocalVocabPtr = std::shared_ptr; // The local vocabulary of the result. - LocalVocabPtr localVocab_ = std::make_shared(); + std::shared_ptr localVocab_ = + std::make_shared(); + + ResultStorage(Data data, std::vector sortedBy, + std::shared_ptr localVocab) + : data_{std::move(data)}, + sortedBy_{std::move(sortedBy)}, + localVocab_{std::move(localVocab)} {} + + bool isDataEvaluated() const noexcept { + return std::holds_alternative(data_); + } + + IdTableType& idTable() { + AD_CONTRACT_CHECK(isDataEvaluated()); + return std::get(data_); + } + + const IdTableType& idTable() const { + AD_CONTRACT_CHECK(isDataEvaluated()); + return std::get(data_); + } + + GeneratorType& idTables() { + AD_CONTRACT_CHECK(!isDataEvaluated()); + return std::get(data_); + } + + const GeneratorType& idTables() const { + AD_CONTRACT_CHECK(!isDataEvaluated()); + return std::get(data_); + } +}; + +class ProtoResult { + friend class CacheableResult; + friend class Result; + using StorageType = ResultStorage>; + StorageType storage_; + + using LocalVocabPtr = std::shared_ptr; // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that @@ -56,6 +94,8 @@ class Result { std::shared_ptr localVocab_; explicit SharedLocalVocabWrapper(LocalVocabPtr localVocab) : localVocab_{std::move(localVocab)} {} + friend ProtoResult; + friend class CacheableResult; friend class Result; public: @@ -67,37 +107,119 @@ class Result { std::make_shared(std::move(localVocab))} {} }; + public: + // Construct from the given arguments (see above) and check the following + // invariants: `localVocab` must not be `nullptr` and each entry of `sortedBy` + // must be a valid column index for the `idTable`. The invariant that the + // `idTable` is sorted by the columns specified by `sortedBy` is only checked, + // if expensive checks are enabled, for example by not defining the `NDEBUG` + // macro. + // The first overload of the constructor is for local vocabs that are shared + // with another `Result` via the `getSharedLocalVocab...` methods below. + // The second overload is for newly created local vocabularies. + ProtoResult(IdTable idTable, std::vector sortedBy, + SharedLocalVocabWrapper localVocab); + ProtoResult(IdTable idTable, std::vector sortedBy, + LocalVocab&& localVocab); + ProtoResult(cppcoro::generator idTables, + std::vector sortedBy, + SharedLocalVocabWrapper localVocab); + ProtoResult(cppcoro::generator idTables, + std::vector sortedBy, LocalVocab&& localVocab); + + public: + ProtoResult(const ProtoResult& other) = delete; + ProtoResult& operator=(const ProtoResult& other) = delete; + + ProtoResult(ProtoResult&& other) = default; + ProtoResult& operator=(ProtoResult&& other) = default; + // For each column in the result (the entries in the outer `vector`) and for // each `Datatype` (the entries of the inner `array`), store the information // how many entries of that datatype are stored in the column. using DatatypeCountsPerColumn = std::vector< std::array(Datatype::MaxValue) + 1>>; std::optional datatypeCountsPerColumn_; - Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab); + + // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. + // Note: If additional members and invariants are added to the class (for + // example information about the datatypes in each column) make sure that + // those are still correct after performing this operation. + void applyLimitOffset( + const LimitOffsetClause& limitOffset, + std::function limitTimeCallback); + + void enforceLimitOffset(const LimitOffsetClause& limitOffset); + + // Check that if the `varColMap` guarantees that a column is always defined + // (i.e. that is contains no single undefined value) that there are indeed no + // undefined values in the `data_` of this result. Return `true` iff the + // check is successful. + bool checkDefinedness(const VariableToColumnMap& varColMap); + + // Get the information, which columns stores how many entries of each + // datatype. This information is computed on the first call to this function + // `O(num-entries-in-table)` and then cached for subsequent usages. + const DatatypeCountsPerColumn& getOrComputeDatatypeCountsPerColumn(); static void validateIdTable(const IdTable& idTable, const std::vector& sortedBy); + const IdTable& idTable() const; + + bool isDataEvaluated() const noexcept; +}; + +class CacheableResult { + friend class Result; + using StorageType = + ResultStorage>; + StorageType storage_; + public: - // Construct from the given arguments (see above) and check the following - // invariants: `localVocab` must not be `nullptr` and each entry of `sortedBy` - // must be a valid column index for the `idTable`. The invariant that the - // `idTable` is sorted by the columns specified by `sortedBy` is only checked, - // if expensive checks are enabled, for example by not defining the `NDEBUG` - // macro. - // The first overload of the constructor is for local vocabs that are shared - // with another `Result` via the `getSharedLocalVocab...` methods below. - // The second overload is for newly created local vocabularies. - Result(IdTable idTable, std::vector sortedBy, - SharedLocalVocabWrapper localVocab); - Result(IdTable idTable, std::vector sortedBy, - LocalVocab&& localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, SharedLocalVocabWrapper localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocab&& localVocab); + CacheableResult(const CacheableResult& other) = delete; + CacheableResult& operator=(const CacheableResult& other) = delete; + + CacheableResult(CacheableResult&& other) = default; + CacheableResult& operator=(CacheableResult&& other) = default; + + explicit CacheableResult(ProtoResult protoResult); + + ad_utility::MemorySize getCurrentSize() const; + + void setOnSizeChanged(std::function onSizeChanged); + + void setOnGeneratorFinished(std::function onGeneratorFinished); + + void setOnNextChunkComputed( + std::function onNextChunkComputed); + + ProtoResult aggregateTable() const; + + const IdTable& idTable() const; + + bool isDataEvaluated() const noexcept; +}; + +// The result of an `Operation`. This is the class QLever uses for all +// intermediate or final results when processing a SPARQL query. The actual data +// is always a table and contained in the member `idTable()`. +class Result { + private: + using StorageType = ResultStorage, + cppcoro::generator>; + mutable StorageType storage_; + + using LocalVocabPtr = std::shared_ptr; + + using SharedLocalVocabWrapper = ProtoResult::SharedLocalVocabWrapper; + + Result(std::shared_ptr idTable, + std::vector sortedBy, LocalVocabPtr localVocab); + Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocabPtr localVocab); + public: // Prevent accidental copying of a result table. Result(const Result& other) = delete; Result& operator=(const Result& other) = delete; @@ -106,17 +228,16 @@ class Result { Result(Result&& other) = default; Result& operator=(Result&& other) = default; - // Default destructor. - virtual ~Result() = default; - // Const access to the underlying `IdTable`. const IdTable& idTable() const; // Access to the underlying `IdTable`s. - cppcoro::generator idTables() const; + cppcoro::generator& idTables() const; // Const access to the columns by which the `idTable()` is sorted. - const std::vector& sortedBy() const { return sortedBy_; } + const std::vector& sortedBy() const { + return storage_.sortedBy_; + } // Get the local vocabulary of this result, used for lookup only. // @@ -129,12 +250,12 @@ class Result { // Filter::computeFilterImpl (evaluationContext) // Variable::evaluate (idToStringAndType) // - const LocalVocab& localVocab() const { return *localVocab_; } + const LocalVocab& localVocab() const { return *storage_.localVocab_; } // Get the local vocab as a shared pointer to const. This can be used if one // result has the same local vocab as one of its child results. SharedLocalVocabWrapper getSharedLocalVocab() const { - return SharedLocalVocabWrapper{localVocab_}; + return SharedLocalVocabWrapper{storage_.localVocab_}; } // Like `getSharedLocalVocabFrom`, but takes more than one result and merges @@ -148,7 +269,7 @@ class Result { static SharedLocalVocabWrapper getMergedLocalVocab(R&& subResults) { std::vector vocabs; for (const Result& table : subResults) { - vocabs.push_back(std::to_address(table.localVocab_)); + vocabs.push_back(std::to_address(table.storage_.localVocab_)); } return SharedLocalVocabWrapper{LocalVocab::merge(vocabs)}; } @@ -169,43 +290,15 @@ class Result { // The first rows of the result and its total size (for debugging). string asDebugString() const; - // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. - // Note: If additional members and invariants are added to the class (for - // example information about the datatypes in each column) make sure that - // those are still correct after performing this operation. - void applyLimitOffset( - const LimitOffsetClause& limitOffset, - std::function limitTimeCallback); - - void enforceLimitOffset(const LimitOffsetClause& limitOffset); - - // Get the information, which columns stores how many entries of each - // datatype. This information is computed on the first call to this function - // `O(num-entries-in-table)` and then cached for subsequent usages. - const DatatypeCountsPerColumn& getOrComputeDatatypeCountsPerColumn(); - - // Check that if the `varColMap` guarantees that a column is always defined - // (i.e. that is contains no single undefined value) that there are indeed no - // undefined values in the `data_` of this result. Return `true` iff the - // check is successful. - bool checkDefinedness(const VariableToColumnMap& varColMap); - - ad_utility::MemorySize getCurrentSize() const; - - void setOnSizeChanged(std::function onSizeChanged); - - void setOnGeneratorFinished(std::function onGeneratorFinished); - - void setOnNextChunkComputed( - std::function onNextChunkComputed); - - Result aggregateTable() const; + static Result createResultWithFullyEvaluatedIdTable( + std::shared_ptr cacheableResult); static Result createResultWithFallback( - std::shared_ptr original, std::function fallback, + std::shared_ptr original, + std::function fallback, std::function onIteration); static Result createResultAsMasterConsumer( - std::shared_ptr original, + std::shared_ptr original, std::function onIteration); }; diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index c4252a9a8f..b6b2c47a1a 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -92,7 +92,7 @@ size_t Service::getCostEstimate() { } // ____________________________________________________________________________ -Result Service::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Service::computeResult([[maybe_unused]] bool requestLaziness) { // Get the URL of the SPARQL endpoint. std::string_view serviceIriString = parsedServiceClause_.serviceIri_.iri(); AD_CONTRACT_CHECK(serviceIriString.starts_with("<") && diff --git a/src/engine/Service.h b/src/engine/Service.h index 9594b66b19..427802bf3d 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -96,7 +96,7 @@ class Service : public Operation { std::string getCacheKeyImpl() const override; // Compute the result using `getTsvFunction_`. - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; // Get a VALUES clause that contains the values of the siblingTree's result. std::optional getSiblingValuesClause() const; diff --git a/src/engine/Sort.cpp b/src/engine/Sort.cpp index 28b6be76ac..7c336186fa 100644 --- a/src/engine/Sort.cpp +++ b/src/engine/Sort.cpp @@ -51,7 +51,7 @@ std::string Sort::getDescriptor() const { } // _____________________________________________________________________________ -Result Sort::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Sort::computeResult([[maybe_unused]] bool requestLaziness) { using std::endl; LOG(DEBUG) << "Getting sub-result for Sort result computation..." << endl; std::shared_ptr subRes = subtree_->getResult(); diff --git a/src/engine/Sort.h b/src/engine/Sort.h index d8a77c4f8d..15570a3708 100644 --- a/src/engine/Sort.h +++ b/src/engine/Sort.h @@ -67,7 +67,7 @@ class Sort : public Operation { } private: - virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override { diff --git a/src/engine/TextIndexScanForEntity.cpp b/src/engine/TextIndexScanForEntity.cpp index 80150ed29d..78c29e8734 100644 --- a/src/engine/TextIndexScanForEntity.cpp +++ b/src/engine/TextIndexScanForEntity.cpp @@ -14,7 +14,7 @@ TextIndexScanForEntity::TextIndexScanForEntity( word_(std::move(word)) {} // _____________________________________________________________________________ -Result TextIndexScanForEntity::computeResult( +ProtoResult TextIndexScanForEntity::computeResult( [[maybe_unused]] bool requestLaziness) { IdTable idTable = getExecutionContext()->getIndex().getEntityMentionsForWord( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForEntity.h b/src/engine/TextIndexScanForEntity.h index 4679847f74..f1b11f9018 100644 --- a/src/engine/TextIndexScanForEntity.h +++ b/src/engine/TextIndexScanForEntity.h @@ -101,7 +101,7 @@ class TextIndexScanForEntity : public Operation { return std::get(varOrFixed_.entity_).second; } - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 5a7fa19425..6e141ad518 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -13,7 +13,7 @@ TextIndexScanForWord::TextIndexScanForWord(QueryExecutionContext* qec, isPrefix_(word_.ends_with('*')) {} // _____________________________________________________________________________ -Result TextIndexScanForWord::computeResult( +ProtoResult TextIndexScanForWord::computeResult( [[maybe_unused]] bool requestLaziness) { IdTable idTable = getExecutionContext()->getIndex().getWordPostingsForTerm( word_, getExecutionContext()->getAllocator()); diff --git a/src/engine/TextIndexScanForWord.h b/src/engine/TextIndexScanForWord.h index 139c7d40fe..3628e27d9c 100644 --- a/src/engine/TextIndexScanForWord.h +++ b/src/engine/TextIndexScanForWord.h @@ -50,7 +50,7 @@ class TextIndexScanForWord : public Operation { private: // Returns a Result containing an IdTable with the columns being // the text variable and the completed word (if it was prefixed) - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {}; } }; diff --git a/src/engine/TextLimit.cpp b/src/engine/TextLimit.cpp index 1988763a1a..4125f676cd 100644 --- a/src/engine/TextLimit.cpp +++ b/src/engine/TextLimit.cpp @@ -18,7 +18,7 @@ TextLimit::TextLimit(QueryExecutionContext* qec, const size_t limit, scoreColumns_(scoreColumns) {} // _____________________________________________________________________________ -Result TextLimit::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult TextLimit::computeResult([[maybe_unused]] bool requestLaziness) { std::shared_ptr childRes = child_->getResult(); if (limit_ == 0) { diff --git a/src/engine/TextLimit.h b/src/engine/TextLimit.h index d0a60bd2d6..cbda207f5d 100644 --- a/src/engine/TextLimit.h +++ b/src/engine/TextLimit.h @@ -62,7 +62,7 @@ class TextLimit : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - Result computeResult([[maybe_unused]] bool requestLaziness) override; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {child_.get()}; } }; diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index ea4a7a8560..55ce45ba4d 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -142,7 +142,7 @@ class TransitivePathImpl : public TransitivePathBase { * * @return Result The result of the TransitivePath operation */ - Result computeResult([[maybe_unused]] bool requestLaziness) override { + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && rhs_.isVariable()) { AD_THROW( diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 901f1df7e3..ecccefa539 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -158,7 +158,7 @@ size_t Union::getCostEstimate() { getSizeEstimateBeforeLimit(); } -Result Union::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Union::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Union result computation..." << std::endl; std::shared_ptr subRes1 = _subtrees[0]->getResult(); std::shared_ptr subRes2 = _subtrees[1]->getResult(); @@ -173,8 +173,8 @@ Result Union::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Union result computation done" << std::endl; // If only one of the two operands has a non-empty local vocabulary, share // with that one (otherwise, throws an exception). - return Result{std::move(idTable), resultSortedOn(), - Result::getMergedLocalVocab(*subRes1, *subRes2)}; + return {std::move(idTable), resultSortedOn(), + Result::getMergedLocalVocab(*subRes1, *subRes2)}; } void Union::computeUnion( diff --git a/src/engine/Union.h b/src/engine/Union.h index d70b715e5a..a02d204dc0 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -61,7 +61,7 @@ class Union : public Operation { } private: - virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Values.cpp b/src/engine/Values.cpp index 5c61b3cfe7..181f2f4207 100644 --- a/src/engine/Values.cpp +++ b/src/engine/Values.cpp @@ -108,7 +108,7 @@ void Values::computeMultiplicities() { } // ____________________________________________________________________________ -Result Values::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Values::computeResult([[maybe_unused]] bool requestLaziness) { // Set basic properties of the result table. IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/Values.h b/src/engine/Values.h index 52823bf5ea..3a73533da4 100644 --- a/src/engine/Values.h +++ b/src/engine/Values.h @@ -48,7 +48,7 @@ class Values : public Operation { public: // These two are also used by class `Service`, hence public. - virtual Result computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 70f6abcb7f..b16492b019 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -245,15 +245,6 @@ class CacheableGenerator { IteratorSentinel end() const noexcept { return IteratorSentinel{}; } - cppcoro::generator extractGenerator() && { - auto pointerCopy = computationStorage_; - AD_CORRECTNESS_CHECK(pointerCopy); - std::unique_lock lock{pointerCopy->mutex_}; - cppcoro::generator result{std::move(pointerCopy->generator_)}; - computationStorage_.reset(); - return result; - } - void forEachCachedValue(const std::invocable auto& function) const { computationStorage_->forEachCachedValue(function); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 59ace1df30..413ab158e5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,13 +6,13 @@ add_subdirectory(util) # Link binary ${basename} against `gmock_main`, the threading library, the # general test utilities and all libraries that are specified as additional # arguments. -function(linkTest basename) +function (linkTest basename) qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) endfunction() # Add the executable ${basename} that is compiled from the source file # "${basename}".cpp -function(addTest basename) +function (addTest basename) add_executable(${basename} "${basename}.cpp") endfunction() @@ -43,23 +43,23 @@ if (SINGLE_TEST_BINARY) qlever_target_link_libraries(QLeverAllUnitTestsMain gtest gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain PROPERTIES RUN_SERIAL TRUE) -else () +else() message(STATUS "The tests are split over multiple binaries") -endif () +endif() # Usage: `addAndLinkTest(basename, [additionalLibraries...]` # Add a GTest/GMock test case that is called `basename` and compiled from a file called # `basename.cpp`. All tests are linked against `gmock_main` and the threading library. # additional libraries against which the test case has to be linked can be specified as # additional arguments after the `basename` function(addLinkAndDiscoverTest basename) - if (SINGLE_TEST_BINARY) - target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) - else () - addTest(${basename}) - linkAndDiscoverTest(${basename} ${ARGN}) - endif () + if (SINGLE_TEST_BINARY) + target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) + qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) + else () + addTest(${basename}) + linkAndDiscoverTest(${basename} ${ARGN}) + endif () endfunction() diff --git a/test/CacheableGeneratorTest.cpp b/test/CacheableGeneratorTest.cpp index 0230c30f75..e954adc3bd 100644 --- a/test/CacheableGeneratorTest.cpp +++ b/test/CacheableGeneratorTest.cpp @@ -204,25 +204,6 @@ TEST(CacheableGenerator, verifyExhaustedMasterCausesFreeForAll) { EXPECT_EQ(iterator2, generator.end()); } -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyExtractGeneratorGivesBackOriginal) { - CacheableGenerator generator{testGenerator(2)}; - - { - auto iterator = generator.begin(true); - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(*iterator, 0); - } - - auto innerGenerator = std::move(generator).extractGenerator(); - auto iterator = innerGenerator.begin(); - ASSERT_NE(iterator, innerGenerator.end()); - EXPECT_EQ(*iterator, 1); - - ++iterator; - EXPECT_EQ(iterator, innerGenerator.end()); -} - // _____________________________________________________________________________ TEST(CacheableGenerator, verifyOnGeneratorFinishedIsCalled) { CacheableGenerator generator{testGenerator(1)}; diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index b432c6b2b3..4f7c1a3869 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -914,7 +914,9 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { idTable.push_back({Id::makeFromInt(42)}); idTable.push_back({Id::makeFromInt(1337)}); - Result result{std::move(idTable), {}, LocalVocab{}}; + Result result = Result::createResultWithFullyEvaluatedIdTable( + std::make_shared( + ProtoResult{std::move(idTable), {}, LocalVocab{}})); auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -945,8 +947,8 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { }(); Result result = Result::createResultAsMasterConsumer( - std::make_shared( - Result{std::move(tableGenerator), {}, LocalVocab{}}), + std::make_shared( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), []() {}); auto generator = ExportQueryExecutionTrees::getIdTables(result); @@ -979,8 +981,8 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { }(); Result result = Result::createResultAsMasterConsumer( - std::make_shared( - Result{std::move(tableGenerator), {}, LocalVocab{}}), + std::make_shared( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); @@ -1012,8 +1014,8 @@ TEST(ExportQueryExecutionTrees, }(); Result result = Result::createResultAsMasterConsumer( - std::make_shared( - Result{std::move(tableGenerator), {}, LocalVocab{}}), + std::make_shared( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); @@ -1049,8 +1051,8 @@ TEST(ExportQueryExecutionTrees, }(); Result result = Result::createResultAsMasterConsumer( - std::make_shared( - Result{std::move(tableGenerator), {}, LocalVocab{}}), + std::make_shared( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); @@ -1090,8 +1092,8 @@ TEST(ExportQueryExecutionTrees, }(); Result result = Result::createResultAsMasterConsumer( - std::make_shared( - Result{std::move(tableGenerator), {}, LocalVocab{}}), + std::make_shared( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); @@ -1139,8 +1141,8 @@ TEST(ExportQueryExecutionTrees, }(); Result result = Result::createResultAsMasterConsumer( - std::make_shared( - Result{std::move(tableGenerator), {}, LocalVocab{}}), + std::make_shared( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index 61fe3a0a64..96fa82c5e5 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -51,7 +51,7 @@ class LazyValueOperation : public Operation { return std::move(stream).str(); } - Result computeResult(bool requestLaziness) override { + ProtoResult computeResult(bool requestLaziness) override { if (requestLaziness) { std::vector clones; clones.reserve(idTables_.size()); @@ -110,7 +110,7 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { auto result = filter.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); ASSERT_FALSE(result->isDataEvaluated()); - auto generator = result->idTables(); + auto& generator = result->idTables(); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 3d9d3d9f0e..0915a92b1a 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -58,7 +58,8 @@ TEST(OperationTest, getResultOnlyCached) { // When we now request to only return the result if it is cached, we should // get exactly the same `shared_ptr` as with the previous call. NeutralElementOperation n3{qec}; - EXPECT_EQ(n3.getResult(true, ComputationMode::ONLY_IF_CACHED), result); + EXPECT_EQ(&n3.getResult(true, ComputationMode::ONLY_IF_CACHED)->idTable(), + &result->idTable()); EXPECT_EQ(n3.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedNotPinned); @@ -67,7 +68,8 @@ TEST(OperationTest, getResultOnlyCached) { QueryExecutionContext qecCopy{*qec}; qecCopy._pinResult = true; NeutralElementOperation n4{&qecCopy}; - EXPECT_EQ(n4.getResult(true, ComputationMode::ONLY_IF_CACHED), result); + EXPECT_EQ(&n4.getResult(true, ComputationMode::ONLY_IF_CACHED)->idTable(), + &result->idTable()); // The cache status is `cachedNotPinned` because we found the element cached // but not pinned (it does reflect the status BEFORE the operation). @@ -79,7 +81,8 @@ TEST(OperationTest, getResultOnlyCached) { // We have pinned the result, so requesting it again should return a pinned // result. qecCopy._pinResult = false; - EXPECT_EQ(n4.getResult(true, ComputationMode::ONLY_IF_CACHED), result); + EXPECT_EQ(&n4.getResult(true, ComputationMode::ONLY_IF_CACHED)->idTable(), + &result->idTable()); EXPECT_EQ(n4.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedPinned); diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp index a84e39f8d4..4787bd66e0 100644 --- a/test/SparqlDataTypesTest.cpp +++ b/test/SparqlDataTypesTest.cpp @@ -16,8 +16,9 @@ using enum PositionInTriple; namespace { struct ContextWrapper { Index _index{ad_utility::makeUnlimitedAllocator()}; - Result _resultTable{ - IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}; + Result _resultTable{Result::createResultWithFullyEvaluatedIdTable( + std::make_shared(ProtoResult{ + IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}))}; // TODO `VariableToColumnMap` VariableToColumnMap _hashMap{}; @@ -27,8 +28,9 @@ struct ContextWrapper { } void setIdTable(IdTable&& table) { - _resultTable = - Result{std::move(table), {}, _resultTable.getSharedLocalVocab()}; + _resultTable = Result::createResultWithFullyEvaluatedIdTable( + std::make_shared(ProtoResult{ + std::move(table), {}, _resultTable.getSharedLocalVocab()})); } }; diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index ddfa1aa5df..01ecb73eff 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -9,7 +9,7 @@ namespace textIndexScanTestHelpers { // obtain the textRecord using idToOptionalString. // TODO: Implement a more elegant/stable version inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, - const Result& result, + const ProtoResult& result, const size_t& rowIndex) { return qec->getIndex() .idToOptionalString( @@ -18,7 +18,7 @@ inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, } inline string getEntityFromResultTable(const QueryExecutionContext* qec, - const Result& result, + const ProtoResult& result, const size_t& rowIndex) { return qec->getIndex() .idToOptionalString( @@ -27,7 +27,7 @@ inline string getEntityFromResultTable(const QueryExecutionContext* qec, } inline string getWordFromResultTable(const QueryExecutionContext* qec, - const Result& result, + const ProtoResult& result, const size_t& rowIndex) { return qec->getIndex() .idToOptionalString( diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 9e485e9d36..2cc5217c6d 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -49,7 +49,7 @@ class ValuesForTesting : public Operation { size_t& costEstimate() { return costEstimate_; } // ___________________________________________________________________________ - Result computeResult([[maybe_unused]] bool requestLaziness) override { + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { auto table = table_.clone(); if (supportsLimit_) { table.erase(table.begin() + getLimit().upperBound(table.size()), diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index a3f7183230..24826902fe 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -31,7 +31,7 @@ class StallForeverOperation : public Operation { using Operation::Operation; // Do-nothing operation that runs for 100ms without computing anything, but // which can be cancelled. - Result computeResult([[maybe_unused]] bool requestLaziness) override { + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { auto end = std::chrono::steady_clock::now() + 100ms; while (std::chrono::steady_clock::now() < end) { checkCancellation(); @@ -73,7 +73,7 @@ class ShallowParentOperation : public Operation { return {child_.get()}; } - Result computeResult([[maybe_unused]] bool requestLaziness) override { + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { auto childResult = child_->getResult(); return {childResult->idTable().clone(), resultSortedOn(), childResult->getSharedLocalVocab()}; From b95edfdac24f326d60f98eab3f67206a2a042a54 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 5 Jul 2024 22:12:51 +0200 Subject: [PATCH 066/133] Remove parameter for supportsLimit --- src/engine/CartesianProductJoin.cpp | 2 +- src/engine/CartesianProductJoin.h | 5 +---- src/engine/IndexScan.h | 3 +-- src/engine/Operation.cpp | 2 +- src/engine/Operation.h | 5 +---- src/engine/QueryPlanner.cpp | 3 +-- test/engine/ValuesForTesting.h | 4 +--- 7 files changed, 7 insertions(+), 17 deletions(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 7a4c016cd0..8f8147f5c2 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -150,7 +150,7 @@ ProtoResult CartesianProductJoin::computeResult( // Get all child results (possibly with limit, see above). for (auto& child : childView()) { - if (limitIfPresent.has_value() && child.supportsLimit(false)) { + if (limitIfPresent.has_value() && child.supportsLimit()) { child.setLimit(limitIfPresent.value()); } subResults.push_back(child.getResult()); diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index f83804e91d..96047d62fa 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -67,10 +67,7 @@ class CartesianProductJoin : public Operation { bool knownEmptyResult() override; // The Cartesian product join can efficiently evaluate a limited result. - [[nodiscard]] bool supportsLimit( - [[maybe_unused]] bool lazyResult) const override { - return true; - } + [[nodiscard]] bool supportsLimit() const override { return true; } protected: // Don't promise any sorting of the result. diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 2de30340e7..04de4c1e8d 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -92,8 +92,7 @@ class IndexScan final : public Operation { } // An index scan can directly and efficiently support LIMIT and OFFSET - [[nodiscard]] bool supportsLimit( - [[maybe_unused]] bool lazyResult) const override { + [[nodiscard]] bool supportsLimit() const override { return true; } diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 1cc3c3b17a..464a95cc1a 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -165,7 +165,7 @@ std::shared_ptr Operation::getResult( // that a lot of the time the limit is only artificially applied during // export, allowing the cache to reuse the same operation for different // limits and offsets. - if (!supportsLimit(!result.isDataEvaluated())) { + if (!supportsLimit()) { runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, true); result.applyLimitOffset(_limit, diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 2633be2648..f95310e60c 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -175,10 +175,7 @@ class Operation { // True iff this operation directly implement a `OFFSET` and `LIMIT` clause on // its result. - [[nodiscard]] virtual bool supportsLimit( - [[maybe_unused]] bool lazyResult) const { - return false; - } + [[nodiscard]] virtual bool supportsLimit() const { return false; } // Set the value of the `LIMIT` clause that will be applied to the result of // this operation. diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index bcedc150f8..a1978b3bd1 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -153,8 +153,7 @@ std::vector QueryPlanner::createExecutionTrees( vector& lastRow = plans.back(); for (auto& plan : lastRow) { - if (plan._qet->getRootOperation()->supportsLimit(false) || - plan._qet->getRootOperation()->supportsLimit(true)) { + if (plan._qet->getRootOperation()->supportsLimit()) { plan._qet->getRootOperation()->setLimit(pq._limitOffset); } } diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 2cc5217c6d..dcbf130da8 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -59,9 +59,7 @@ class ValuesForTesting : public Operation { } return {std::move(table), resultSortedOn(), localVocab_.clone()}; } - bool supportsLimit([[maybe_unused]] bool lazyResult) const override { - return supportsLimit_; - } + bool supportsLimit() const override { return supportsLimit_; } private: // ___________________________________________________________________________ From acc99c3ea3fe881f0e0c8f7141f9a797875734a7 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 5 Jul 2024 22:26:37 +0200 Subject: [PATCH 067/133] Fix formatting --- src/engine/IndexScan.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 04de4c1e8d..04615c77d3 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -92,9 +92,7 @@ class IndexScan final : public Operation { } // An index scan can directly and efficiently support LIMIT and OFFSET - [[nodiscard]] bool supportsLimit() const override { - return true; - } + [[nodiscard]] bool supportsLimit() const override { return true; } Permutation::Enum permutation() const { return permutation_; } From 79006197e7e565cde5a5eebba582aa58877d9728 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 5 Jul 2024 22:29:26 +0200 Subject: [PATCH 068/133] Format again --- src/engine/Distinct.h | 3 ++- src/engine/HasPredicateScan.cpp | 3 ++- src/engine/MultiColumnJoin.cpp | 3 ++- src/engine/Sort.h | 3 ++- src/engine/Union.h | 3 ++- src/engine/Values.h | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h index 123f5f96fe..eb5ca83822 100644 --- a/src/engine/Distinct.h +++ b/src/engine/Distinct.h @@ -55,7 +55,8 @@ class Distinct : public Operation { [[nodiscard]] string getCacheKeyImpl() const override; private: - virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult( + [[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index 1ce406d84c..3364dc9d05 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -254,7 +254,8 @@ size_t HasPredicateScan::getCostEstimate() { } // ___________________________________________________________________________ -ProtoResult HasPredicateScan::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult HasPredicateScan::computeResult( + [[maybe_unused]] bool requestLaziness) { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp index c2733123ac..0c92e421c9 100644 --- a/src/engine/MultiColumnJoin.cpp +++ b/src/engine/MultiColumnJoin.cpp @@ -60,7 +60,8 @@ string MultiColumnJoin::getDescriptor() const { } // _____________________________________________________________________________ -ProtoResult MultiColumnJoin::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult MultiColumnJoin::computeResult( + [[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "MultiColumnJoin result computation..." << endl; IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/Sort.h b/src/engine/Sort.h index 15570a3708..d94a69c199 100644 --- a/src/engine/Sort.h +++ b/src/engine/Sort.h @@ -67,7 +67,8 @@ class Sort : public Operation { } private: - virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult( + [[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override { diff --git a/src/engine/Union.h b/src/engine/Union.h index a02d204dc0..00565e372e 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -61,7 +61,8 @@ class Union : public Operation { } private: - virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult( + [[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; }; diff --git a/src/engine/Values.h b/src/engine/Values.h index 3a73533da4..71d25b7a56 100644 --- a/src/engine/Values.h +++ b/src/engine/Values.h @@ -48,7 +48,8 @@ class Values : public Operation { public: // These two are also used by class `Service`, hence public. - virtual ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult( + [[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; From 0d0133a7b425dfaf46054464a63cf1b534b6c250 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 5 Jul 2024 23:00:10 +0200 Subject: [PATCH 069/133] Also perform definedness check for lazy results --- src/engine/Operation.cpp | 6 +--- src/engine/Result.cpp | 69 ++++++++++++++++++++++++++-------------- src/engine/Result.h | 9 ++++-- 3 files changed, 52 insertions(+), 32 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 464a95cc1a..e359c1a952 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -139,11 +139,7 @@ std::shared_ptr Operation::getResult( // individual results, but that requires changes in each individual // operation, therefore we currently only perform this expensive // change in the DEBUG builds. - // This check doesn't make sense when the result has not been evaluated - // yet, so it should be moved into the operations eventually. - AD_EXPENSIVE_CHECK( - !result.isDataEvaluated() || - result.checkDefinedness(getExternallyVisibleVariableColumns())); + result.checkDefinedness(getExternallyVisibleVariableColumns()); // Make sure that the results that are written to the cache have the // correct runtimeInfo. The children of the runtime info are already set // correctly because the result was computed, so we can pass `nullopt` as diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 0c95222d73..bddade6a8a 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -113,50 +113,71 @@ void ProtoResult::applyLimitOffset( // _____________________________________________________________________________ void ProtoResult::enforceLimitOffset(const LimitOffsetClause& limitOffset) { if (storage_.isDataEvaluated()) { - auto numRows = idTable().numRows(); + uint64_t numRows = idTable().numRows(); auto limit = limitOffset._limit; - AD_CONTRACT_CHECK(!limit.has_value() || - numRows <= static_cast(limit.value())); + AD_CONTRACT_CHECK(!limit.has_value() || numRows <= limit.value()); } else { auto generator = [](cppcoro::generator original, LimitOffsetClause limitOffset) -> cppcoro::generator { auto limit = limitOffset._limit; - size_t elementCount = 0; + uint64_t elementCount = 0; for (auto&& idTable : original) { elementCount += idTable.numRows(); - AD_CONTRACT_CHECK(!limit.has_value() || - elementCount <= static_cast(limit.value())); + AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); co_yield std::move(idTable); } - AD_CONTRACT_CHECK(!limit.has_value() || - elementCount <= static_cast(limit.value())); + AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); }(std::move(storage_.idTables()), limitOffset); storage_.idTables() = std::move(generator); } } // _____________________________________________________________ -bool ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { - AD_CONTRACT_CHECK(storage_.isDataEvaluated()); - const auto& datatypesPerColumn = getOrComputeDatatypeCountsPerColumn(); - return std::ranges::all_of(varColMap, [&](const auto& varAndCol) { - const auto& [columnIndex, mightContainUndef] = varAndCol.second; - bool hasUndefined = datatypesPerColumn.at(columnIndex) - .at(static_cast(Datatype::Undefined)) != 0; - return mightContainUndef == ColumnIndexAndTypeInfo::PossiblyUndefined || - !hasUndefined; - }); +void ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { + auto performCheck = + [](const auto& map, + std::optional& datatypeCountsPerColumn, + IdTable& idTable) { + const auto& datatypesPerColumn = getOrComputeDatatypeCountsPerColumn( + datatypeCountsPerColumn, idTable); + return std::ranges::all_of(map, [&](const auto& varAndCol) { + const auto& [columnIndex, mightContainUndef] = varAndCol.second; + bool hasUndefined = + datatypesPerColumn.at(columnIndex) + .at(static_cast(Datatype::Undefined)) != 0; + return mightContainUndef == + ColumnIndexAndTypeInfo::PossiblyUndefined || + !hasUndefined; + }); + }; + if (isDataEvaluated()) { + std::optional datatypeCountsPerColumn; + AD_EXPENSIVE_CHECK( + performCheck(varColMap, datatypeCountsPerColumn, storage_.idTable())); + } else { + auto generator = [](cppcoro::generator original, + VariableToColumnMap varColMap, + auto performCheck) -> cppcoro::generator { + std::optional datatypeCountsPerColumn; + for (auto&& idTable : original) { + AD_EXPENSIVE_CHECK( + performCheck(varColMap, datatypeCountsPerColumn, idTable)); + co_yield std::move(idTable); + } + }(std::move(storage_.idTables()), varColMap, std::move(performCheck)); + storage_.idTables() = std::move(generator); + } } // _____________________________________________________________________________ -auto ProtoResult::getOrComputeDatatypeCountsPerColumn() - -> const DatatypeCountsPerColumn& { - if (datatypeCountsPerColumn_.has_value()) { - return datatypeCountsPerColumn_.value(); +auto ProtoResult::getOrComputeDatatypeCountsPerColumn( + std::optional& datatypeCountsPerColumn, + IdTable& idTable) -> const DatatypeCountsPerColumn& { + if (datatypeCountsPerColumn.has_value()) { + return datatypeCountsPerColumn.value(); } - auto& idTable = storage_.idTable(); - auto& types = datatypeCountsPerColumn_.emplace(); + auto& types = datatypeCountsPerColumn.emplace(); types.resize(idTable.numColumns()); for (size_t i = 0; i < idTable.numColumns(); ++i) { const auto& col = idTable.getColumn(i); diff --git a/src/engine/Result.h b/src/engine/Result.h index 52e734d7ef..b25515b271 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -139,7 +139,6 @@ class ProtoResult { // how many entries of that datatype are stored in the column. using DatatypeCountsPerColumn = std::vector< std::array(Datatype::MaxValue) + 1>>; - std::optional datatypeCountsPerColumn_; // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. // Note: If additional members and invariants are added to the class (for @@ -155,16 +154,20 @@ class ProtoResult { // (i.e. that is contains no single undefined value) that there are indeed no // undefined values in the `data_` of this result. Return `true` iff the // check is successful. - bool checkDefinedness(const VariableToColumnMap& varColMap); + void checkDefinedness(const VariableToColumnMap& varColMap); + private: // Get the information, which columns stores how many entries of each // datatype. This information is computed on the first call to this function // `O(num-entries-in-table)` and then cached for subsequent usages. - const DatatypeCountsPerColumn& getOrComputeDatatypeCountsPerColumn(); + static const DatatypeCountsPerColumn& getOrComputeDatatypeCountsPerColumn( + std::optional& datatypeCountsPerColumn, + IdTable& idTable); static void validateIdTable(const IdTable& idTable, const std::vector& sortedBy); + public: const IdTable& idTable() const; bool isDataEvaluated() const noexcept; From 27ab6922ba006d99dd8be17f688e148f8dd80057 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 5 Jul 2024 23:25:09 +0200 Subject: [PATCH 070/133] Drop definedness caching mechanism --- src/engine/Operation.cpp | 18 +++++++------- src/engine/Result.cpp | 52 ++++++++++++++++++---------------------- src/engine/Result.h | 6 ++--- 3 files changed, 35 insertions(+), 41 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index e359c1a952..5a50d3f7bb 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -132,14 +132,16 @@ std::shared_ptr Operation::getResult( result.isDataEvaluated()); checkCancellation(); - // Compute the datatypes that occur in each column of the result. - // Also assert, that if a column contains UNDEF values, then the - // `mightContainUndef` flag for that columns is set. - // TODO It is cheaper to move this calculation into the - // individual results, but that requires changes in each individual - // operation, therefore we currently only perform this expensive - // change in the DEBUG builds. - result.checkDefinedness(getExternallyVisibleVariableColumns()); + if constexpr (ad_utility::areExpensiveChecksEnabled) { + // Compute the datatypes that occur in each column of the result. + // Also assert, that if a column contains UNDEF values, then the + // `mightContainUndef` flag for that columns is set. + // TODO It is cheaper to move this calculation into the + // individual results, but that requires changes in each individual + // operation, therefore we currently only perform this expensive + // change in the DEBUG builds. + result.checkDefinedness(getExternallyVisibleVariableColumns()); + } // Make sure that the results that are written to the cache have the // correct runtimeInfo. The children of the runtime info are already set // correctly because the result was computed, so we can pass `nullopt` as diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index bddade6a8a..8e418c2d33 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -135,34 +135,32 @@ void ProtoResult::enforceLimitOffset(const LimitOffsetClause& limitOffset) { // _____________________________________________________________ void ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { - auto performCheck = - [](const auto& map, - std::optional& datatypeCountsPerColumn, - IdTable& idTable) { - const auto& datatypesPerColumn = getOrComputeDatatypeCountsPerColumn( - datatypeCountsPerColumn, idTable); - return std::ranges::all_of(map, [&](const auto& varAndCol) { - const auto& [columnIndex, mightContainUndef] = varAndCol.second; - bool hasUndefined = - datatypesPerColumn.at(columnIndex) - .at(static_cast(Datatype::Undefined)) != 0; - return mightContainUndef == - ColumnIndexAndTypeInfo::PossiblyUndefined || - !hasUndefined; - }); - }; + auto performCheck = [](const auto& map, IdTable& idTable) { + DatatypeCountsPerColumn datatypeCountsPerColumn = + computeDatatypeCountsPerColumn(idTable); + return std::ranges::all_of(map, [&](const auto& varAndCol) { + const auto& [columnIndex, mightContainUndef] = varAndCol.second; + bool hasUndefined = + datatypeCountsPerColumn.at(columnIndex) + .at(static_cast(Datatype::Undefined)) != 0; + return mightContainUndef == ColumnIndexAndTypeInfo::PossiblyUndefined || + !hasUndefined; + }); + }; if (isDataEvaluated()) { - std::optional datatypeCountsPerColumn; - AD_EXPENSIVE_CHECK( - performCheck(varColMap, datatypeCountsPerColumn, storage_.idTable())); + AD_EXPENSIVE_CHECK(performCheck(varColMap, storage_.idTable())); } else { auto generator = [](cppcoro::generator original, VariableToColumnMap varColMap, auto performCheck) -> cppcoro::generator { - std::optional datatypeCountsPerColumn; + bool first = true; for (auto&& idTable : original) { - AD_EXPENSIVE_CHECK( - performCheck(varColMap, datatypeCountsPerColumn, idTable)); + if (first) { + first = false; + // No need to check subsequent idTables assuming the datatypes + // don't change mid result. + AD_EXPENSIVE_CHECK(performCheck(varColMap, idTable)); + } co_yield std::move(idTable); } }(std::move(storage_.idTables()), varColMap, std::move(performCheck)); @@ -171,13 +169,9 @@ void ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { } // _____________________________________________________________________________ -auto ProtoResult::getOrComputeDatatypeCountsPerColumn( - std::optional& datatypeCountsPerColumn, - IdTable& idTable) -> const DatatypeCountsPerColumn& { - if (datatypeCountsPerColumn.has_value()) { - return datatypeCountsPerColumn.value(); - } - auto& types = datatypeCountsPerColumn.emplace(); +auto ProtoResult::computeDatatypeCountsPerColumn(IdTable& idTable) + -> DatatypeCountsPerColumn { + DatatypeCountsPerColumn types; types.resize(idTable.numColumns()); for (size_t i = 0; i < idTable.numColumns(); ++i) { const auto& col = idTable.getColumn(i); diff --git a/src/engine/Result.h b/src/engine/Result.h index b25515b271..49f6110f85 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -158,10 +158,8 @@ class ProtoResult { private: // Get the information, which columns stores how many entries of each - // datatype. This information is computed on the first call to this function - // `O(num-entries-in-table)` and then cached for subsequent usages. - static const DatatypeCountsPerColumn& getOrComputeDatatypeCountsPerColumn( - std::optional& datatypeCountsPerColumn, + // datatype. + static DatatypeCountsPerColumn computeDatatypeCountsPerColumn( IdTable& idTable); static void validateIdTable(const IdTable& idTable, From 2da169fe04461ea4f5927eb8115178720c1ce4fa Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 5 Jul 2024 23:52:07 +0200 Subject: [PATCH 071/133] Add comment --- src/engine/ExportQueryExecutionTrees.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index fe58b3204a..b68f59f3cc 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -664,6 +664,7 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( std::shared_ptr result = qet.getResult(query._limitOffset._limit.has_value()); result->logResultSize(); + // TODO this timer only makes sense for non lazy results. auto timeResultComputation = requestTimer.msecs(); std::optional resultSize = From 0cbb47d430dc16c3ec9f047edc98e55ef1b376fa Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 6 Jul 2024 18:07:04 +0200 Subject: [PATCH 072/133] Split lambdas into dedicated functions --- src/engine/Operation.cpp | 245 +++++++++++++++++++++------------------ src/engine/Operation.h | 11 ++ 2 files changed, 142 insertions(+), 114 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 5a50d3f7bb..538e9a4a40 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -69,6 +69,130 @@ void Operation::recursivelySetTimeConstraint( }); } +// _____________________________________________________________________________ +ProtoResult Operation::runComputation(ad_utility::Timer& timer, + ComputationMode computationMode) { + checkCancellation(); + runtimeInfo().status_ = RuntimeInformation::Status::inProgress; + signalQueryUpdate(); + ProtoResult result = + computeResult(computationMode == ComputationMode::LAZY_IF_SUPPORTED); + AD_CONTRACT_CHECK(computationMode == ComputationMode::LAZY_IF_SUPPORTED || + result.isDataEvaluated()); + + checkCancellation(); + if constexpr (ad_utility::areExpensiveChecksEnabled) { + // Compute the datatypes that occur in each column of the result. + // Also assert, that if a column contains UNDEF values, then the + // `mightContainUndef` flag for that columns is set. + // TODO It is cheaper to move this calculation into the + // individual results, but that requires changes in each individual + // operation, therefore we currently only perform this expensive + // change in the DEBUG builds. + result.checkDefinedness(getExternallyVisibleVariableColumns()); + } + // Make sure that the results that are written to the cache have the + // correct runtimeInfo. The children of the runtime info are already set + // correctly because the result was computed, so we can pass `nullopt` as + // the last argument. + if (result.isDataEvaluated()) { + updateRuntimeInformationOnSuccess(result.idTable().size(), + ad_utility::CacheStatus::computed, + timer.msecs(), std::nullopt); + } else { + // TODO check if this is sufficient here or we need more of + // `updateRuntimeInformationOnSuccess` functionality here. + runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; + } + // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not + // already perform it. An example for an operation that directly computes + // the Limit is a full index scan with three variables. Note that the + // `QueryPlanner` does currently only set the limit for operations that + // support it natively, except for operations in subqueries. This means + // that a lot of the time the limit is only artificially applied during + // export, allowing the cache to reuse the same operation for different + // limits and offsets. + if (!supportsLimit()) { + runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, true); + result.applyLimitOffset(_limit, [runtimeInfo = getRuntimeInfoPointer()]( + std::chrono::milliseconds limitTime) { + runtimeInfo->totalTime_ += limitTime; + }); + } else { + result.enforceLimitOffset(_limit); + } + return result; +} + +// _____________________________________________________________________________ +CacheValue Operation::runComputationAndTransformToCache( + ad_utility::Timer& timer, ComputationMode computationMode, + const std::string& cacheKey) { + auto& cache = _executionContext->getQueryTreeCache(); + auto result = CacheableResult{runComputation(timer, computationMode)}; + if (!result.isDataEvaluated()) { + result.setOnSizeChanged([&cache, cacheKey](bool isShrinkable) { + // TODO find out how to handle pinned entries properly. + auto sizeChange = cache.recomputeSize(cacheKey, !isShrinkable); + if (sizeChange == ad_utility::ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE) { + return isShrinkable; + } + return false; + }); + result.setOnGeneratorFinished([&cache, cacheKey](bool isComplete) mutable { + if (isComplete) { + cache.transformValue(cacheKey, [](const CacheValue& oldValue) { + return CacheValue{ + CacheableResult{oldValue.resultTable().aggregateTable()}, + oldValue.runtimeInfo()}; + }); + } + }); + result.setOnNextChunkComputed([runtimeInfo = getRuntimeInfoPointer()]( + std::chrono::milliseconds duration) { + runtimeInfo->totalTime_ += duration; + }); + } + return CacheValue{std::move(result), runtimeInfo()}; +} + +// _____________________________________________________________________________ +Result Operation::extractFromCache( + std::shared_ptr result, bool freshlyInserted, + bool isRoot, ComputationMode computationMode) { + if (result->isDataEvaluated()) { + auto resultNumRows = result->idTable().size(); + auto resultNumCols = result->idTable().numColumns(); + LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " + << resultNumCols << std::endl; + } + + // TODO fix case where non-lazy request fetches cached lazy result + // and doesn't aggregate as this might break operations. + if (result->isDataEvaluated()) { + return Result::createResultWithFullyEvaluatedIdTable(std::move(result)); + } + if (freshlyInserted) { + return Result::createResultAsMasterConsumer( + std::move(result), + isRoot ? std::function{[this]() { signalQueryUpdate(); }} + : std::function{}); + } + // TODO timer does not make sense here. + ad_utility::Timer timer{ad_utility::Timer::Started}; + return Result::createResultWithFallback( + std::move(result), + [this, timer = std::move(timer), computationMode]() mutable { + return runComputation(timer, computationMode); + }, + [this, isRoot](auto duration) { + runtimeInfo().totalTime_ += duration; + if (isRoot) { + signalQueryUpdate(); + } + }); +} + // ________________________________________________________________________ std::shared_ptr Operation::getResult( bool isRoot, ComputationMode computationMode) { @@ -121,96 +245,15 @@ std::shared_ptr Operation::getResult( } }); bool actuallyComputed = false; - auto computeLambda = [this, &timer, computationMode, &actuallyComputed] { - checkCancellation(); - runtimeInfo().status_ = RuntimeInformation::Status::inProgress; - signalQueryUpdate(); - ProtoResult result = - computeResult(computationMode == ComputationMode::LAZY_IF_SUPPORTED); + auto cacheSetup = [this, &timer, computationMode, &actuallyComputed, + &cacheKey]() { actuallyComputed = true; - AD_CONTRACT_CHECK(computationMode == ComputationMode::LAZY_IF_SUPPORTED || - result.isDataEvaluated()); - - checkCancellation(); - if constexpr (ad_utility::areExpensiveChecksEnabled) { - // Compute the datatypes that occur in each column of the result. - // Also assert, that if a column contains UNDEF values, then the - // `mightContainUndef` flag for that columns is set. - // TODO It is cheaper to move this calculation into the - // individual results, but that requires changes in each individual - // operation, therefore we currently only perform this expensive - // change in the DEBUG builds. - result.checkDefinedness(getExternallyVisibleVariableColumns()); - } - // Make sure that the results that are written to the cache have the - // correct runtimeInfo. The children of the runtime info are already set - // correctly because the result was computed, so we can pass `nullopt` as - // the last argument. - if (result.isDataEvaluated()) { - updateRuntimeInformationOnSuccess(result.idTable().size(), - ad_utility::CacheStatus::computed, - timer.msecs(), std::nullopt); - } else { - // TODO check if this is sufficient here or we need more of - // `updateRuntimeInformationOnSuccess` functionality here. - runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; - } - // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not - // already perform it. An example for an operation that directly computes - // the Limit is a full index scan with three variables. Note that the - // `QueryPlanner` does currently only set the limit for operations that - // support it natively, except for operations in subqueries. This means - // that a lot of the time the limit is only artificially applied during - // export, allowing the cache to reuse the same operation for different - // limits and offsets. - if (!supportsLimit()) { - runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, - true); - result.applyLimitOffset(_limit, - [runtimeInfo = getRuntimeInfoPointer()]( - std::chrono::milliseconds limitTime) { - runtimeInfo->totalTime_ += limitTime; - }); - } else { - result.enforceLimitOffset(_limit); - } - return result; - }; - - auto cacheSetup = [this, &computeLambda, &cache, &cacheKey]() { - auto result = CacheableResult{computeLambda()}; - if (!result.isDataEvaluated()) { - result.setOnSizeChanged([&cache, cacheKey](bool isShrinkable) { - // TODO find out how to handle pinned entries properly. - auto sizeChange = cache.recomputeSize(cacheKey, !isShrinkable); - if (sizeChange == - ad_utility::ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE) { - return isShrinkable; - } - return false; - }); - result.setOnGeneratorFinished( - [&cache, cacheKey](bool isComplete) mutable { - if (isComplete) { - cache.transformValue(cacheKey, [](const CacheValue& oldValue) { - return CacheValue{ - CacheableResult{oldValue.resultTable().aggregateTable()}, - oldValue.runtimeInfo()}; - }); - } - }); - result.setOnNextChunkComputed([runtimeInfo = getRuntimeInfoPointer()]( - std::chrono::milliseconds duration) { - runtimeInfo->totalTime_ += duration; - }); - } - return CacheValue{std::move(result), runtimeInfo()}; + return runComputationAndTransformToCache(timer, computationMode, + cacheKey); }; bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; - // TODO fix case where non-lazy request fetches cached lazy result - // and doesn't aggregate as this might break operations. auto result = pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, onlyReadFromCache) @@ -220,40 +263,14 @@ std::shared_ptr Operation::getResult( AD_CORRECTNESS_CHECK(onlyReadFromCache); return nullptr; } - updateRuntimeInformationOnSuccess( result, result._resultPointer->resultTable().isDataEvaluated() ? timer.msecs() : result._resultPointer->runtimeInfo().totalTime_); - if (result._resultPointer->resultTable().isDataEvaluated()) { - auto resultNumRows = - result._resultPointer->resultTable().idTable().size(); - auto resultNumCols = - result._resultPointer->resultTable().idTable().numColumns(); - LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " - << resultNumCols << std::endl; - } - - if (result._resultPointer->resultTable().isDataEvaluated()) { - return std::make_shared( - Result::createResultWithFullyEvaluatedIdTable( - result._resultPointer->resultTablePtr())); - } else if (actuallyComputed) { - return std::make_shared( - Result::createResultAsMasterConsumer( - result._resultPointer->resultTablePtr(), - isRoot ? std::function{[this]() { signalQueryUpdate(); }} - : std::function{})); - } - return std::make_shared(Result::createResultWithFallback( - result._resultPointer->resultTablePtr(), std::move(computeLambda), - [this, isRoot](auto duration) { - runtimeInfo().totalTime_ += duration; - if (isRoot) { - signalQueryUpdate(); - } - })); + return std::make_shared( + extractFromCache(result._resultPointer->resultTablePtr(), + actuallyComputed, isRoot, computationMode)); } catch (ad_utility::CancellationException& e) { e.setOperation(getDescriptor()); runtimeInfo().status_ = RuntimeInformation::Status::cancelled; diff --git a/src/engine/Operation.h b/src/engine/Operation.h index f95310e60c..ed4ae5edfc 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -259,6 +259,17 @@ class Operation { //! Compute the result of the query-subtree rooted at this element.. virtual ProtoResult computeResult(bool requestLaziness) = 0; + ProtoResult runComputation(ad_utility::Timer& timer, + ComputationMode computationMode); + + CacheValue runComputationAndTransformToCache(ad_utility::Timer& timer, + ComputationMode computationMode, + const std::string& cacheKey); + + Result extractFromCache(std::shared_ptr result, + bool freshlyInserted, bool isRoot, + ComputationMode computationMode); + // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. virtual void updateRuntimeInformationOnSuccess( From 5adda07072f5d8a8b308614160f5e2277f864f7a Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 6 Jul 2024 18:32:56 +0200 Subject: [PATCH 073/133] Make move/copy constructors explicit --- src/engine/QueryExecutionContext.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 1e19883d18..f9eea06029 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -22,32 +22,37 @@ class CacheValue { private: - std::shared_ptr _resultTable; - RuntimeInformation _runtimeInfo; + std::shared_ptr resultTable_; + RuntimeInformation runtimeInfo_; public: explicit CacheValue(CacheableResult resultTable, RuntimeInformation runtimeInfo) - : _resultTable(std::make_shared(std::move(resultTable))), - _runtimeInfo(std::move(runtimeInfo)) {} + : resultTable_{std::make_shared(std::move(resultTable))}, + runtimeInfo_{std::move(runtimeInfo)} {} - const CacheableResult& resultTable() const noexcept { return *_resultTable; } + CacheValue(CacheValue&&) = default; + CacheValue(const CacheValue&) = delete; + CacheValue& operator=(CacheValue&&) = default; + CacheValue& operator=(const CacheValue&) = delete; + + const CacheableResult& resultTable() const noexcept { return *resultTable_; } std::shared_ptr resultTablePtr() const noexcept { - return _resultTable; + return resultTable_; } const RuntimeInformation& runtimeInfo() const noexcept { - return _runtimeInfo; + return runtimeInfo_; } ~CacheValue() { - if (!_resultTable->isDataEvaluated()) { + if (resultTable_ && !resultTable_->isDataEvaluated()) { // Clear listeners try { - _resultTable->setOnSizeChanged({}); - _resultTable->setOnGeneratorFinished({}); - _resultTable->setOnNextChunkComputed({}); + resultTable_->setOnSizeChanged({}); + resultTable_->setOnGeneratorFinished({}); + resultTable_->setOnNextChunkComputed({}); } catch (...) { // Should never happen. The listeners only throw assertion errors // if the result is evaluated. @@ -59,7 +64,7 @@ class CacheValue { // Calculates the `MemorySize` taken up by an instance of `CacheValue`. struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { - if (const auto& tablePtr = cacheValue._resultTable; tablePtr) { + if (const auto& tablePtr = cacheValue.resultTable_; tablePtr) { return tablePtr->getCurrentSize(); } else { return 0_B; From e0cdf187c4fe86330977c66694b827b35cb99a0a Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 6 Jul 2024 19:54:07 +0200 Subject: [PATCH 074/133] Fix undefined behaviour --- src/util/CacheableGenerator.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index b16492b019..9b361112f7 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -29,7 +29,7 @@ class CacheableGenerator { class ComputationStorage { friend CacheableGenerator; - mutable std::shared_mutex mutex_; + mutable std::recursive_mutex mutex_; std::condition_variable_any conditionVariable_; cppcoro::generator generator_; std::optional generatorIterator_{}; @@ -106,8 +106,10 @@ class CacheableGenerator { } } + // TODO return shared pointer instead of reference for thread + // safety Reference getCachedValue(size_t index) const { - std::shared_lock lock{mutex_}; + std::lock_guard lock{mutex_}; if (!cachedValues_.at(index).has_value()) { throw IteratorExpired{}; } @@ -117,7 +119,7 @@ class CacheableGenerator { // Needs to be public in order to compile with gcc 11 & 12 public: bool isDone(size_t index) noexcept { - std::shared_lock lock{mutex_}; + std::lock_guard lock{mutex_}; return index >= cachedValues_.size() && generatorIterator_.has_value() && generatorIterator_.value() == generator_.end(); } @@ -150,7 +152,7 @@ class CacheableGenerator { void forEachCachedValue( const std::invocable auto& function) const { - std::shared_lock lock{mutex_}; + std::lock_guard lock{mutex_}; for (const auto& optional : cachedValues_) { if (optional.has_value()) { function(optional.value()); From 5ad5b8a6c30b97d2f250e147c8f07b5fd043acc2 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 6 Jul 2024 22:15:18 +0200 Subject: [PATCH 075/133] Workaround segfault --- src/engine/Operation.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 538e9a4a40..6931f4529e 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -141,7 +141,14 @@ CacheValue Operation::runComputationAndTransformToCache( }); result.setOnGeneratorFinished([&cache, cacheKey](bool isComplete) mutable { if (isComplete) { - cache.transformValue(cacheKey, [](const CacheValue& oldValue) { + // Move key onto the stack, because transformValue indirectly clears + // this listener causing `cacheKey` to be erased from the heap. `cache` + // does not need to be stored on the stack because it is passed via + // reference, so the original object where `this` will be pointing to + // when calling `transformValue` will continue to exist even if this + // lambda doesn't anymore. + std::string key = std::move(cacheKey); + cache.transformValue(key, [](const CacheValue& oldValue) { return CacheValue{ CacheableResult{oldValue.resultTable().aggregateTable()}, oldValue.runtimeInfo()}; From db187f0cdb64eebd7ad348be7d4169b567b3f176 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 7 Jul 2024 03:08:40 +0200 Subject: [PATCH 076/133] Try different attempt to fix double locking --- src/util/CacheableGenerator.h | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 9b361112f7..4f00015c3b 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -5,9 +5,13 @@ #ifndef CACHEABLEGENERATOR_H #define CACHEABLEGENERATOR_H +#include + +#include #include #include #include +#include #include #include "util/Exception.h" @@ -29,12 +33,13 @@ class CacheableGenerator { class ComputationStorage { friend CacheableGenerator; - mutable std::recursive_mutex mutex_; + mutable std::shared_mutex mutex_; std::condition_variable_any conditionVariable_; cppcoro::generator generator_; std::optional generatorIterator_{}; std::vector> cachedValues_{}; MasterIteratorState masterState_ = MasterIteratorState::NOT_STARTED; + std::atomic currentOwningThread{}; // Returns true if cache needs to shrink, accepts a parameter that tells the // callback if we actually can shrink std::function onSizeChanged_{}; @@ -52,6 +57,9 @@ class CacheableGenerator { private: void advanceTo(size_t index, bool isMaster) { std::unique_lock lock{mutex_}; + currentOwningThread = std::this_thread::get_id(); + absl::Cleanup cleanup{ + [this]() { currentOwningThread = std::thread::id{}; }}; AD_CONTRACT_CHECK(index <= cachedValues_.size()); // Make sure master iterator does exist and we're not blocking // indefinitely @@ -109,7 +117,7 @@ class CacheableGenerator { // TODO return shared pointer instead of reference for thread // safety Reference getCachedValue(size_t index) const { - std::lock_guard lock{mutex_}; + std::shared_lock lock{mutex_}; if (!cachedValues_.at(index).has_value()) { throw IteratorExpired{}; } @@ -119,7 +127,7 @@ class CacheableGenerator { // Needs to be public in order to compile with gcc 11 & 12 public: bool isDone(size_t index) noexcept { - std::lock_guard lock{mutex_}; + std::shared_lock lock{mutex_}; return index >= cachedValues_.size() && generatorIterator_.has_value() && generatorIterator_.value() == generator_.end(); } @@ -134,25 +142,38 @@ class CacheableGenerator { } void setOnSizeChanged(std::function onSizeChanged) noexcept { - std::lock_guard lock{mutex_}; + std::unique_lock lock{mutex_, std::defer_lock}; + if (currentOwningThread != std::this_thread::get_id()) { + lock.lock(); + } onSizeChanged_ = std::move(onSizeChanged); } void setOnGeneratorFinished( std::function onGeneratorFinished) noexcept { - std::lock_guard lock{mutex_}; + std::unique_lock lock{mutex_, std::defer_lock}; + if (currentOwningThread != std::this_thread::get_id()) { + lock.lock(); + } onGeneratorFinished_ = std::move(onGeneratorFinished); } void setOnNextChunkComputed(std::function onNextChunkComputed) noexcept { - std::lock_guard lock{mutex_}; + std::unique_lock lock{mutex_, std::defer_lock}; + if (currentOwningThread != std::this_thread::get_id()) { + lock.lock(); + } onNextChunkComputed_ = std::move(onNextChunkComputed); } void forEachCachedValue( const std::invocable auto& function) const { - std::lock_guard lock{mutex_}; + // Don't lock again if we're calling this within a listener. + std::shared_lock lock{mutex_, std::defer_lock}; + if (currentOwningThread != std::this_thread::get_id()) { + lock.lock(); + } for (const auto& optional : cachedValues_) { if (optional.has_value()) { function(optional.value()); From 4ba81bd2f110db47b2fb37cc297a27df383f3a6f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 7 Jul 2024 14:23:47 +0200 Subject: [PATCH 077/133] Avoid pseudo false-positive thread sanitizer warning --- src/engine/QueryExecutionContext.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index f9eea06029..cb811262e5 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -24,6 +24,8 @@ class CacheValue { private: std::shared_ptr resultTable_; RuntimeInformation runtimeInfo_; + std::unique_ptr newlyCreated = + std::make_unique(true); public: explicit CacheValue(CacheableResult resultTable, @@ -65,6 +67,15 @@ class CacheValue { struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { if (const auto& tablePtr = cacheValue.resultTable_; tablePtr) { + // Avoid holding lock on initial computation (where the result will be 0 + // anyways) to prevent thread sanitizer warning of potential deadlocks + // because later in the execution the cache lock is acquired after + // acquiring the lock of the cached generator, whereas here we would do + // it in the opposite order otherwise. + if (cacheValue.newlyCreated->exchange(false) && + !tablePtr->isDataEvaluated()) { + return 0_B; + } return tablePtr->getCurrentSize(); } else { return 0_B; From 373f009a659eed67241b7693cdbfa52f96dcead2 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 11 Jul 2024 17:23:12 +0200 Subject: [PATCH 078/133] Restructure code to avoid class of race conditions --- src/engine/Operation.cpp | 45 ++--- src/engine/QueryExecutionContext.h | 43 +++-- src/engine/Result.cpp | 74 +++------ src/engine/Result.h | 12 +- src/util/CacheableGenerator.h | 78 ++++----- test/CacheableGeneratorTest.cpp | 259 ++++++++++++----------------- 6 files changed, 218 insertions(+), 293 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 6931f4529e..8a8d9d282b 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -129,9 +129,10 @@ CacheValue Operation::runComputationAndTransformToCache( ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey) { auto& cache = _executionContext->getQueryTreeCache(); - auto result = CacheableResult{runComputation(timer, computationMode)}; + CacheableResult result{runComputation(timer, computationMode)}; if (!result.isDataEvaluated()) { - result.setOnSizeChanged([&cache, cacheKey](bool isShrinkable) { + result.setOnSizeChanged([&cache, cacheKey](bool isShrinkable, bool, + std::shared_ptr) { // TODO find out how to handle pinned entries properly. auto sizeChange = cache.recomputeSize(cacheKey, !isShrinkable); if (sizeChange == ad_utility::ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE) { @@ -139,22 +140,6 @@ CacheValue Operation::runComputationAndTransformToCache( } return false; }); - result.setOnGeneratorFinished([&cache, cacheKey](bool isComplete) mutable { - if (isComplete) { - // Move key onto the stack, because transformValue indirectly clears - // this listener causing `cacheKey` to be erased from the heap. `cache` - // does not need to be stored on the stack because it is passed via - // reference, so the original object where `this` will be pointing to - // when calling `transformValue` will continue to exist even if this - // lambda doesn't anymore. - std::string key = std::move(cacheKey); - cache.transformValue(key, [](const CacheValue& oldValue) { - return CacheValue{ - CacheableResult{oldValue.resultTable().aggregateTable()}, - oldValue.runtimeInfo()}; - }); - } - }); result.setOnNextChunkComputed([runtimeInfo = getRuntimeInfoPointer()]( std::chrono::milliseconds duration) { runtimeInfo->totalTime_ += duration; @@ -174,8 +159,28 @@ Result Operation::extractFromCache( << resultNumCols << std::endl; } - // TODO fix case where non-lazy request fetches cached lazy result - // and doesn't aggregate as this might break operations. + // Keep backwards compatible for operations that don't support this + if (!result->isDataEvaluated() && + computationMode == ComputationMode::FULLY_MATERIALIZED) { + auto& cache = _executionContext->getQueryTreeCache(); + auto cacheKey = getCacheKey(); + try { + cache.transformValue(getCacheKey(), [](const CacheValue& oldValue) { + const auto& oldResult = oldValue.resultTable(); + return CacheValue{CacheableResult{oldResult.aggregateTable().value()}, + oldValue.runtimeInfo()}; + }); + } catch (const std::bad_optional_access&) { + ad_utility::Timer timer{ad_utility::Timer::Started}; + CacheableResult newResult{runComputation(timer, computationMode)}; + cache.transformValue( + cacheKey, [this, &newResult, &result](const CacheValue&) { + CacheValue value{std::move(newResult), runtimeInfo()}; + result = value.resultTablePtr(); + return value; + }); + } + } if (result->isDataEvaluated()) { return Result::createResultWithFullyEvaluatedIdTable(std::move(result)); } diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index cb811262e5..0bb51a7a48 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -24,14 +24,33 @@ class CacheValue { private: std::shared_ptr resultTable_; RuntimeInformation runtimeInfo_; - std::unique_ptr newlyCreated = - std::make_unique(true); + std::shared_ptr currentSize_ = + std::make_shared(0); public: explicit CacheValue(CacheableResult resultTable, RuntimeInformation runtimeInfo) : resultTable_{std::make_shared(std::move(resultTable))}, - runtimeInfo_{std::move(runtimeInfo)} {} + runtimeInfo_{std::move(runtimeInfo)} { + if (!resultTable_->isDataEvaluated()) { + auto function = resultTable_->resetOnSizeChanged(); + // We assume this value has previously been set, otherwise this can be + // simplified + AD_CONTRACT_CHECK(function); + resultTable_->setOnSizeChanged( + [function = std::move(function), currentSize = currentSize_]( + bool isShrinkable, bool entryAdded, + std::shared_ptr entry) { + ad_utility::MemorySize size = calculateSize(*entry); + if (entryAdded) { + currentSize->fetch_add(size.getBytes()); + } else { + currentSize->fetch_sub(size.getBytes()); + } + return function(isShrinkable, entryAdded, std::move(entry)); + }); + } + } CacheValue(CacheValue&&) = default; CacheValue(const CacheValue&) = delete; @@ -48,12 +67,16 @@ class CacheValue { return runtimeInfo_; } + static ad_utility::MemorySize calculateSize(const IdTable& idTable) { + return ad_utility::MemorySize::bytes(idTable.size() * idTable.numColumns() * + sizeof(Id)); + }; + ~CacheValue() { if (resultTable_ && !resultTable_->isDataEvaluated()) { // Clear listeners try { resultTable_->setOnSizeChanged({}); - resultTable_->setOnGeneratorFinished({}); resultTable_->setOnNextChunkComputed({}); } catch (...) { // Should never happen. The listeners only throw assertion errors @@ -67,16 +90,10 @@ class CacheValue { struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { if (const auto& tablePtr = cacheValue.resultTable_; tablePtr) { - // Avoid holding lock on initial computation (where the result will be 0 - // anyways) to prevent thread sanitizer warning of potential deadlocks - // because later in the execution the cache lock is acquired after - // acquiring the lock of the cached generator, whereas here we would do - // it in the opposite order otherwise. - if (cacheValue.newlyCreated->exchange(false) && - !tablePtr->isDataEvaluated()) { - return 0_B; + if (tablePtr->isDataEvaluated()) { + return calculateSize(tablePtr->idTable()); } - return tablePtr->getCurrentSize(); + return ad_utility::MemorySize::bytes(cacheValue.currentSize_->load()); } else { return 0_B; } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 8e418c2d33..c70864a988 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -221,34 +221,17 @@ CacheableResult::CacheableResult(ProtoResult protoResult) std::move(protoResult.storage_.localVocab_), }} {} -// _____________________________________________________________________________ -ad_utility::MemorySize CacheableResult::getCurrentSize() const { - auto calculateSize = [](const IdTable& idTable) { - return ad_utility::MemorySize::bytes(idTable.size() * idTable.numColumns() * - sizeof(Id)); - }; - if (storage_.isDataEvaluated()) { - return calculateSize(idTable()); - } else { - ad_utility::MemorySize totalMemory = 0_B; - storage_.idTables().forEachCachedValue( - [&totalMemory, &calculateSize](const IdTable& idTable) { - totalMemory += calculateSize(idTable); - }); - return totalMemory; - } -} - // _____________________________________________________________________________ void CacheableResult::setOnSizeChanged( - std::function onSizeChanged) { + std::function)> + onSizeChanged) { storage_.idTables().setOnSizeChanged(std::move(onSizeChanged)); } // _____________________________________________________________________________ -void CacheableResult::setOnGeneratorFinished( - std::function onGeneratorFinished) { - storage_.idTables().setOnGeneratorFinished(std::move(onGeneratorFinished)); +std::function)> +CacheableResult::resetOnSizeChanged() { + return storage_.idTables().resetOnSizeChanged(); } // _____________________________________________________________________________ @@ -258,29 +241,23 @@ void CacheableResult::setOnNextChunkComputed( } // _____________________________________________________________________________ -ProtoResult CacheableResult::aggregateTable() const { - size_t totalRows = 0; - size_t numCols = 0; - std::optional allocator; - storage_.idTables().forEachCachedValue( - [&totalRows, &numCols, &allocator](const IdTable& table) { - totalRows += table.numRows(); - if (numCols == 0) { - numCols = table.numColumns(); - } - if (!allocator.has_value()) { - allocator = table.getAllocator(); - } - }); - IdTable idTable{ - numCols, std::move(allocator).value_or(makeAllocatorWithLimit(0_B))}; - idTable.reserve(totalRows); - storage_.idTables().forEachCachedValue([&idTable](const IdTable& table) { - idTable.insertAtEnd(table.begin(), table.end()); - }); - return ProtoResult{ - std::move(idTable), storage_.sortedBy_, - ProtoResult::SharedLocalVocabWrapper{storage_.localVocab_}}; +std::optional CacheableResult::aggregateTable() const { + try { + std::optional clone; + for (const std::shared_ptr& table : storage_.idTables()) { + if (clone.has_value()) { + clone->insertAtEnd(table->begin(), table->end()); + } else { + clone.emplace(table->clone()); + } + } + AD_CORRECTNESS_CHECK(clone.has_value()); + return ProtoResult{ + std::move(clone).value(), storage_.sortedBy_, + ProtoResult::SharedLocalVocabWrapper{storage_.localVocab_}}; + } catch (const ad_utility::IteratorExpired&) { + return std::nullopt; + } } // _____________________________________________________________________________ @@ -373,7 +350,7 @@ Result Result::createResultWithFallback( size_t index = 0; try { for (auto&& idTable : sharedResult->storage_.idTables()) { - co_yield idTable; + co_yield *idTable; index++; } co_return; @@ -421,11 +398,12 @@ Result Result::createResultAsMasterConsumer( auto onIteration) -> cppcoro::generator { using ad_utility::IteratorWrapper; auto& generator = original->storage_.idTables(); - for (const IdTable& idTable : IteratorWrapper{generator, true}) { + for (std::shared_ptr idTable : + IteratorWrapper{generator, true}) { if (onIteration) { onIteration(); } - co_yield idTable; + co_yield *idTable; } }; auto sortedBy = original->storage_.sortedBy_; diff --git a/src/engine/Result.h b/src/engine/Result.h index 49f6110f85..3e6f3c7dba 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -16,7 +16,6 @@ #include "global/Id.h" #include "parser/data/LimitOffsetClause.h" #include "util/CacheableGenerator.h" -#include "util/MemorySize/MemorySize.h" template class ResultStorage { @@ -186,16 +185,17 @@ class CacheableResult { explicit CacheableResult(ProtoResult protoResult); - ad_utility::MemorySize getCurrentSize() const; + void setOnSizeChanged( + std::function)> + onSizeChanged); - void setOnSizeChanged(std::function onSizeChanged); - - void setOnGeneratorFinished(std::function onGeneratorFinished); + std::function)> + resetOnSizeChanged(); void setOnNextChunkComputed( std::function onNextChunkComputed); - ProtoResult aggregateTable() const; + std::optional aggregateTable() const; const IdTable& idTable() const; diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 4f00015c3b..5b8d1639bf 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -26,8 +26,6 @@ class IteratorExpired : public std::exception {}; template class CacheableGenerator { using GenIterator = typename cppcoro::generator::iterator; - using Reference = const T&; - using Pointer = const T*; enum class MasterIteratorState { NOT_STARTED, MASTER_STARTED, MASTER_DONE }; @@ -37,13 +35,13 @@ class CacheableGenerator { std::condition_variable_any conditionVariable_; cppcoro::generator generator_; std::optional generatorIterator_{}; - std::vector> cachedValues_{}; + std::vector> cachedValues_{}; MasterIteratorState masterState_ = MasterIteratorState::NOT_STARTED; std::atomic currentOwningThread{}; // Returns true if cache needs to shrink, accepts a parameter that tells the - // callback if we actually can shrink - std::function onSizeChanged_{}; - std::function onGeneratorFinished_{}; + // callback if we actually can shrink, if the size changed because of a + // newly generated entry and the entry about to be removed or newly added. + std::function)> onSizeChanged_{}; std::function onNextChunkComputed_{}; public: @@ -70,7 +68,7 @@ class CacheableGenerator { AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::NOT_STARTED); } if (index < cachedValues_.size()) { - if (!cachedValues_.at(index).has_value()) { + if (!cachedValues_.at(index)) { throw IteratorExpired{}; } return; @@ -101,27 +99,24 @@ class CacheableGenerator { start)); } if (generatorIterator_.value() != generator_.end()) { - cachedValues_.emplace_back(std::move(*generatorIterator_.value())); - if (onSizeChanged_ && onSizeChanged_(true)) { + auto pointer = + std::make_shared(std::move(*generatorIterator_.value())); + cachedValues_.push_back(pointer); + if (onSizeChanged_ && onSizeChanged_(true, true, std::move(pointer))) { tryShrinkCache(); } - } else if (onGeneratorFinished_) { - onGeneratorFinished_(cachedValues_.empty() || - cachedValues_.at(0).has_value()); } if (isMaster) { conditionVariable_.notify_all(); } } - // TODO return shared pointer instead of reference for thread - // safety - Reference getCachedValue(size_t index) const { + std::shared_ptr getCachedValue(size_t index) const { std::shared_lock lock{mutex_}; - if (!cachedValues_.at(index).has_value()) { + if (!cachedValues_.at(index)) { throw IteratorExpired{}; } - return cachedValues_.at(index).value(); + return cachedValues_.at(index); } // Needs to be public in order to compile with gcc 11 & 12 @@ -141,7 +136,9 @@ class CacheableGenerator { conditionVariable_.notify_all(); } - void setOnSizeChanged(std::function onSizeChanged) noexcept { + void setOnSizeChanged( + std::function)> + onSizeChanged) noexcept { std::unique_lock lock{mutex_, std::defer_lock}; if (currentOwningThread != std::this_thread::get_id()) { lock.lock(); @@ -149,13 +146,13 @@ class CacheableGenerator { onSizeChanged_ = std::move(onSizeChanged); } - void setOnGeneratorFinished( - std::function onGeneratorFinished) noexcept { + std::function)> + resetOnSizeChanged() noexcept { std::unique_lock lock{mutex_, std::defer_lock}; if (currentOwningThread != std::this_thread::get_id()) { lock.lock(); } - onGeneratorFinished_ = std::move(onGeneratorFinished); + return std::move(onSizeChanged_); } void setOnNextChunkComputed(std::function @@ -167,28 +164,15 @@ class CacheableGenerator { onNextChunkComputed_ = std::move(onNextChunkComputed); } - void forEachCachedValue( - const std::invocable auto& function) const { - // Don't lock again if we're calling this within a listener. - std::shared_lock lock{mutex_, std::defer_lock}; - if (currentOwningThread != std::this_thread::get_id()) { - lock.lock(); - } - for (const auto& optional : cachedValues_) { - if (optional.has_value()) { - function(optional.value()); - } - } - } - void tryShrinkCache() { size_t maxBound = cachedValues_.size() - 1; for (size_t i = 0; i < maxBound; i++) { - if (cachedValues_.at(i).has_value()) { - cachedValues_.at(i).reset(); + auto& pointer = cachedValues_.at(i); + if (pointer) { + auto movedPointer = std::move(pointer); if (onSizeChanged_) { bool isShrinkable = i < maxBound - 1; - if (onSizeChanged_(isShrinkable)) { + if (onSizeChanged_(isShrinkable, false, std::move(movedPointer))) { AD_CONTRACT_CHECK(isShrinkable); } else { break; @@ -255,11 +239,9 @@ class CacheableGenerator { // Need to provide post-increment operator to implement the 'Range' concept. void operator++(int) { (void)operator++(); } - Reference operator*() const { + std::shared_ptr operator*() const { return storage()->getCachedValue(currentIndex_); } - - Pointer operator->() const { return std::addressof(operator*()); } }; Iterator begin(bool isMaster = false) const { @@ -268,17 +250,15 @@ class CacheableGenerator { IteratorSentinel end() const noexcept { return IteratorSentinel{}; } - void forEachCachedValue(const std::invocable auto& function) const { - computationStorage_->forEachCachedValue(function); - } - - void setOnSizeChanged(std::function onSizeChanged) noexcept { + void setOnSizeChanged( + std::function)> + onSizeChanged) noexcept { computationStorage_->setOnSizeChanged(std::move(onSizeChanged)); } - void setOnGeneratorFinished( - std::function onGeneratorFinished) noexcept { - computationStorage_->setOnGeneratorFinished(std::move(onGeneratorFinished)); + std::function)> + resetOnSizeChanged() { + return computationStorage_->resetOnSizeChanged(); } void setOnNextChunkComputed(std::function diff --git a/test/CacheableGeneratorTest.cpp b/test/CacheableGeneratorTest.cpp index e954adc3bd..feff2aa662 100644 --- a/test/CacheableGeneratorTest.cpp +++ b/test/CacheableGeneratorTest.cpp @@ -27,15 +27,15 @@ TEST(CacheableGenerator, allowsMultiConsumption) { auto iterator1 = generator.begin(true); ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(*iterator1, 0); + EXPECT_EQ(**iterator1, 0); ++iterator1; ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(*iterator1, 1); + EXPECT_EQ(**iterator1, 1); ++iterator1; ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(*iterator1, 2); + EXPECT_EQ(**iterator1, 2); ++iterator1; EXPECT_EQ(iterator1, generator.end()); @@ -43,15 +43,15 @@ TEST(CacheableGenerator, allowsMultiConsumption) { auto iterator2 = generator.begin(false); ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(*iterator2, 0); + EXPECT_EQ(**iterator2, 0); ++iterator2; ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(*iterator2, 1); + EXPECT_EQ(**iterator2, 1); ++iterator2; ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(*iterator2, 2); + EXPECT_EQ(**iterator2, 2); ++iterator2; EXPECT_EQ(iterator2, generator.end()); } @@ -80,7 +80,7 @@ TEST(CacheableGenerator, masterBlocksSlaves) { } cv.notify_all(); - EXPECT_EQ(*iterator, 0); + EXPECT_EQ(**iterator, 0); ++iterator; ASSERT_NE(iterator, generator.end()); @@ -91,7 +91,7 @@ TEST(CacheableGenerator, masterBlocksSlaves) { } cv.notify_all(); - EXPECT_EQ(*iterator, 1); + EXPECT_EQ(**iterator, 1); ++iterator; ASSERT_NE(iterator, generator.end()); @@ -102,7 +102,7 @@ TEST(CacheableGenerator, masterBlocksSlaves) { } cv.notify_all(); - EXPECT_EQ(*iterator, 2); + EXPECT_EQ(**iterator, 2); ++iterator; EXPECT_EQ(iterator, generator.end()); @@ -118,26 +118,26 @@ TEST(CacheableGenerator, masterBlocksSlaves) { ASSERT_NE(iterator, generator.end()); EXPECT_GE(counter, 0); - EXPECT_EQ(*iterator, 0); + EXPECT_EQ(**iterator, 0); ++iterator; ASSERT_NE(iterator, generator.end()); EXPECT_GE(counter, 1); - EXPECT_EQ(*iterator, 1); + EXPECT_EQ(**iterator, 1); ++iterator; ASSERT_NE(iterator, generator.end()); EXPECT_GE(counter, 2); - EXPECT_EQ(*iterator, 2); + EXPECT_EQ(**iterator, 2); ++iterator; EXPECT_EQ(iterator, generator.end()); EXPECT_GE(counter, 3); }}; - EXPECT_EQ(*masterIterator, 0); + EXPECT_EQ(**masterIterator, 0); { std::unique_lock guard{counterMutex}; @@ -147,7 +147,7 @@ TEST(CacheableGenerator, masterBlocksSlaves) { } ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(*masterIterator, 1); + EXPECT_EQ(**masterIterator, 1); { std::unique_lock guard{counterMutex}; cv.wait(guard, [&]() { return proceedStage == 2; }); @@ -156,7 +156,7 @@ TEST(CacheableGenerator, masterBlocksSlaves) { } ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(*masterIterator, 2); + EXPECT_EQ(**masterIterator, 2); { std::unique_lock guard{counterMutex}; cv.wait(guard, [&]() { return proceedStage == 3; }); @@ -178,24 +178,24 @@ TEST(CacheableGenerator, verifyExhaustedMasterCausesFreeForAll) { ASSERT_NE(iterator1, generator.end()); ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(*iterator1, 0); - EXPECT_EQ(*iterator2, 0); + EXPECT_EQ(**iterator1, 0); + EXPECT_EQ(**iterator2, 0); ++iterator1; ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(*iterator1, 1); + EXPECT_EQ(**iterator1, 1); ++iterator2; ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(*iterator2, 1); + EXPECT_EQ(**iterator2, 1); ++iterator2; ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(*iterator2, 2); + EXPECT_EQ(**iterator2, 2); ++iterator1; ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(*iterator1, 2); + EXPECT_EQ(**iterator1, 2); ++iterator1; EXPECT_EQ(iterator1, generator.end()); @@ -204,72 +204,6 @@ TEST(CacheableGenerator, verifyExhaustedMasterCausesFreeForAll) { EXPECT_EQ(iterator2, generator.end()); } -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyOnGeneratorFinishedIsCalled) { - CacheableGenerator generator{testGenerator(1)}; - - bool flag = false; - - generator.setOnGeneratorFinished([&](bool value) { - flag = true; - EXPECT_TRUE(value); - }); - auto iterator = generator.begin(true); - ASSERT_NE(iterator, generator.end()); - - EXPECT_FALSE(flag); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); - - EXPECT_TRUE(flag); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, - verifyOnGeneratorFinishedIsCalledCorrectlyWhenExpired) { - CacheableGenerator generator{testGenerator(2)}; - - bool flag = false; - - generator.setOnGeneratorFinished([&](bool value) { - flag = true; - EXPECT_FALSE(value); - }); - generator.setOnSizeChanged(std::identity{}); - - auto iterator = generator.begin(true); - ASSERT_NE(iterator, generator.end()); - - EXPECT_FALSE(flag); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - - EXPECT_FALSE(flag); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); - - EXPECT_TRUE(flag); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyOnGeneratorFinishedIsCalledWhenEmpty) { - CacheableGenerator generator{testGenerator(0)}; - - bool flag = false; - - generator.setOnGeneratorFinished([&](bool value) { - flag = true; - EXPECT_TRUE(value); - }); - auto iterator = generator.begin(true); - EXPECT_EQ(iterator, generator.end()); - - EXPECT_TRUE(flag); -} - // _____________________________________________________________________________ TEST(CacheableGenerator, verifyOnNextChunkComputedIsCalled) { auto timedGenerator = []() -> generator { @@ -370,10 +304,11 @@ TEST(CacheableGenerator, verifyOnNextChunkComputedIsCalled) { TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { CacheableGenerator generator{testGenerator(3)}; uint32_t callCounter = 0; - generator.setOnSizeChanged([&](bool canShrink) { - ++callCounter; - return canShrink && callCounter > 2; - }); + generator.setOnSizeChanged( + [&](bool canShrink, bool, std::shared_ptr) { + ++callCounter; + return canShrink && callCounter > 2; + }); auto iterator = generator.begin(true); EXPECT_EQ(callCounter, 1); @@ -382,7 +317,7 @@ TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { auto slaveIterator1 = generator.begin(); EXPECT_EQ(callCounter, 1); ASSERT_NE(slaveIterator1, generator.end()); - EXPECT_EQ(*slaveIterator1, 0); + EXPECT_EQ(**slaveIterator1, 0); ++iterator; EXPECT_EQ(callCounter, 2); @@ -391,17 +326,17 @@ TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { ++slaveIterator1; EXPECT_EQ(callCounter, 2); ASSERT_NE(slaveIterator1, generator.end()); - EXPECT_EQ(*slaveIterator1, 1); + EXPECT_EQ(**slaveIterator1, 1); auto slaveIterator2 = generator.begin(); EXPECT_EQ(callCounter, 2); ASSERT_NE(slaveIterator2, generator.end()); - EXPECT_EQ(*slaveIterator2, 0); + EXPECT_EQ(**slaveIterator2, 0); ++iterator; EXPECT_EQ(callCounter, 5); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(*iterator, 2); + EXPECT_EQ(**iterator, 2); ++iterator; EXPECT_EQ(callCounter, 5); @@ -409,24 +344,58 @@ TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { ++slaveIterator1; ASSERT_NE(slaveIterator1, generator.end()); - EXPECT_EQ(*slaveIterator1, 2); + EXPECT_EQ(**slaveIterator1, 2); EXPECT_THROW(++slaveIterator2, ad_utility::IteratorExpired); } // _____________________________________________________________________________ -TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { +TEST(CacheableGenerator, verifyOnSizeChangedIsCalledWithCorrectParameters) { CacheableGenerator generator{testGenerator(3)}; uint32_t callCounter = 0; - generator.setOnSizeChanged([&](bool canShrink) { + generator.setOnSizeChanged([&](bool canShrink, bool wasAdded, + std::shared_ptr pointer) { + switch (callCounter) { + case 0: + case 1: + EXPECT_TRUE(wasAdded); + EXPECT_EQ(*pointer, callCounter); + break; + case 2: + EXPECT_FALSE(wasAdded); + EXPECT_EQ(*pointer, 0); + break; + default: + ADD_FAILURE() << "Invalid call count: " << callCounter; + break; + } ++callCounter; - return canShrink && callCounter > 2; + return canShrink && callCounter > 1; }); auto iterator = generator.begin(true); EXPECT_EQ(callCounter, 1); ASSERT_NE(iterator, generator.end()); + ++iterator; + EXPECT_EQ(callCounter, 3); + ASSERT_NE(iterator, generator.end()); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { + CacheableGenerator generator{testGenerator(3)}; + uint32_t callCounter = 0; + generator.setOnSizeChanged( + [&](bool canShrink, bool, std::shared_ptr) { + ++callCounter; + return canShrink && callCounter > 2; + }); + + auto iterator = generator.begin(true); + EXPECT_EQ(callCounter, 1); + ASSERT_NE(iterator, generator.end()); + auto slaveIterator = generator.begin(); EXPECT_EQ(callCounter, 1); ASSERT_NE(slaveIterator, generator.end()); @@ -442,7 +411,7 @@ TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { ++iterator; EXPECT_EQ(callCounter, 5); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(*iterator, 2); + EXPECT_EQ(**iterator, 2); ++iterator; EXPECT_EQ(callCounter, 5); @@ -450,17 +419,18 @@ TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { ++slaveIterator; ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(*slaveIterator, 2); + EXPECT_EQ(**slaveIterator, 2); } // _____________________________________________________________________________ TEST(CacheableGenerator, verifyShrinkStopsShrinking) { CacheableGenerator generator{testGenerator(3)}; uint32_t callCounter = 0; - generator.setOnSizeChanged([&](bool canShrink) { - ++callCounter; - return canShrink && callCounter == 3; - }); + generator.setOnSizeChanged( + [&](bool canShrink, bool, std::shared_ptr) { + ++callCounter; + return canShrink && callCounter == 3; + }); auto iterator = generator.begin(true); EXPECT_EQ(callCounter, 1); @@ -484,89 +454,64 @@ TEST(CacheableGenerator, verifyShrinkStopsShrinking) { ++slaveIterator; ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(*slaveIterator, 1); + EXPECT_EQ(**slaveIterator, 1); ++slaveIterator; ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(*slaveIterator, 2); + EXPECT_EQ(**slaveIterator, 2); } +// _____________________________________________________________________________ TEST(CacheableGenerator, verifySlavesCantBlockMasterIterator) { CacheableGenerator generator{testGenerator(3)}; - generator.setOnSizeChanged(std::identity{}); + generator.setOnSizeChanged( + [](bool canShrink, bool, std::shared_ptr) { + return canShrink; + }); auto masterIterator = generator.begin(true); ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(*masterIterator, 0); + EXPECT_EQ(**masterIterator, 0); auto slaveIterator = generator.begin(false); ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(*slaveIterator, 0); + EXPECT_EQ(**slaveIterator, 0); ++masterIterator; ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(*masterIterator, 1); + EXPECT_EQ(**masterIterator, 1); ++masterIterator; ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(*masterIterator, 2); + EXPECT_EQ(**masterIterator, 2); - EXPECT_THROW(*slaveIterator, ad_utility::IteratorExpired); + EXPECT_THROW(**slaveIterator, ad_utility::IteratorExpired); ++masterIterator; EXPECT_EQ(masterIterator, generator.end()); } // _____________________________________________________________________________ -TEST(CacheableGenerator, testForEachCachedValueIteratesCorrectly) { +TEST(CacheableGenerator, verifyResetDoesRemoveAndReturnListener) { CacheableGenerator generator{testGenerator(3)}; uint32_t callCounter = 0; - generator.setOnSizeChanged([&](bool canShrink) { - ++callCounter; - return canShrink && callCounter > 2; - }); + generator.setOnSizeChanged( + [&](bool canShrink, bool, std::shared_ptr) { + ++callCounter; + return canShrink; + }); + + auto function = generator.resetOnSizeChanged(); + ASSERT_TRUE(function); auto iterator = generator.begin(true); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(iterator, generator.end()); + EXPECT_EQ(callCounter, 0); - uint32_t timesCalledFirst = 0; - generator.forEachCachedValue([&](int value) { - EXPECT_EQ(timesCalledFirst, value); - ++timesCalledFirst; - }); - EXPECT_EQ(timesCalledFirst, 1); + auto result = function(false, false, {}); + EXPECT_EQ(callCounter, 1); + EXPECT_FALSE(result); - ++iterator; + result = function(true, false, {}); EXPECT_EQ(callCounter, 2); - ASSERT_NE(iterator, generator.end()); - - uint32_t timesCalledSecond = 0; - generator.forEachCachedValue([&](int value) { - EXPECT_EQ(timesCalledSecond, value); - ++timesCalledSecond; - }); - EXPECT_EQ(timesCalledSecond, 2); - - ++iterator; - EXPECT_EQ(callCounter, 5); - ASSERT_NE(iterator, generator.end()); - - uint32_t timesCalledThird = 0; - generator.forEachCachedValue([&](int value) { - EXPECT_EQ(timesCalledThird + 2, value); - ++timesCalledThird; - }); - EXPECT_EQ(timesCalledThird, 1); - - ++iterator; - EXPECT_EQ(callCounter, 5); - EXPECT_EQ(iterator, generator.end()); - - uint32_t timesCalledFourth = 0; - generator.forEachCachedValue([&](int value) { - EXPECT_EQ(timesCalledFourth + 2, value); - ++timesCalledFourth; - }); - EXPECT_EQ(timesCalledFourth, 1); + EXPECT_TRUE(result); } From 27e451e9e30e03e13f9fe2cde651b6b69323cd51 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 11 Jul 2024 18:35:13 +0200 Subject: [PATCH 079/133] Clarify currently buggy behaviour --- src/engine/Operation.cpp | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 8a8d9d282b..33b32630dc 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -152,23 +152,19 @@ CacheValue Operation::runComputationAndTransformToCache( Result Operation::extractFromCache( std::shared_ptr result, bool freshlyInserted, bool isRoot, ComputationMode computationMode) { - if (result->isDataEvaluated()) { - auto resultNumRows = result->idTable().size(); - auto resultNumCols = result->idTable().numColumns(); - LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " - << resultNumCols << std::endl; - } - // Keep backwards compatible for operations that don't support this if (!result->isDataEvaluated() && computationMode == ComputationMode::FULLY_MATERIALIZED) { auto& cache = _executionContext->getQueryTreeCache(); auto cacheKey = getCacheKey(); try { - cache.transformValue(getCacheKey(), [](const CacheValue& oldValue) { + cache.transformValue(getCacheKey(), [&result]( + const CacheValue& oldValue) { const auto& oldResult = oldValue.resultTable(); - return CacheValue{CacheableResult{oldResult.aggregateTable().value()}, - oldValue.runtimeInfo()}; + CacheValue value{CacheableResult{oldResult.aggregateTable().value()}, + oldValue.runtimeInfo()}; + result = value.resultTablePtr(); + return value; }); } catch (const std::bad_optional_access&) { ad_utility::Timer timer{ad_utility::Timer::Started}; @@ -180,10 +176,25 @@ Result Operation::extractFromCache( return value; }); } + // TODO In rare cases this assertion might be violated when + // the cache is cleared while we are transforming. The solution would be to + // replace transformValue with a solution that is part of computeOnce that + // calls some sort of "cache entry needs recomputing check" and replaces the + // value while blocking the individual entry, but not the whole cache. + AD_CORRECTNESS_CHECK(result->isDataEvaluated()); + } + + if (result->isDataEvaluated()) { + auto resultNumRows = result->idTable().size(); + auto resultNumCols = result->idTable().numColumns(); + LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " + << resultNumCols << std::endl; } + if (result->isDataEvaluated()) { return Result::createResultWithFullyEvaluatedIdTable(std::move(result)); } + if (freshlyInserted) { return Result::createResultAsMasterConsumer( std::move(result), From 96982aa8de4fdc0a23bd003952dead5847b218e8 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 11 Jul 2024 23:22:34 +0200 Subject: [PATCH 080/133] Fix macOS build --- src/util/CacheableGenerator.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 5b8d1639bf..9994063a1d 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -152,7 +152,11 @@ class CacheableGenerator { if (currentOwningThread != std::this_thread::get_id()) { lock.lock(); } - return std::move(onSizeChanged_); + auto result = std::move(onSizeChanged_); + // Explicitly empty function because not all standard libraries do that on + // move. + onSizeChanged_ = {}; + return result; } void setOnNextChunkComputed(std::function From 9c07e4f74e1dd166495a5d8387b789028dca029b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:18:25 +0200 Subject: [PATCH 081/133] Fix wrong merge conflict resolution --- test/CMakeLists.txt | 22 +++++++++++----------- test/ExportQueryExecutionTreesTest.cpp | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3d0c2e9ac4..2249d90db9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,13 +6,13 @@ add_subdirectory(util) # Link binary ${basename} against `gmock_main`, the threading library, the # general test utilities and all libraries that are specified as additional # arguments. -function (linkTest basename) +function(linkTest basename) qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) endfunction() # Add the executable ${basename} that is compiled from the source file # "${basename}".cpp -function (addTest basename) +function(addTest basename) add_executable(${basename} "${basename}.cpp") endfunction() @@ -43,23 +43,23 @@ if (SINGLE_TEST_BINARY) qlever_target_link_libraries(QLeverAllUnitTestsMain gtest gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain PROPERTIES RUN_SERIAL TRUE) -else() +else () message(STATUS "The tests are split over multiple binaries") -endif() +endif () # Usage: `addAndLinkTest(basename, [additionalLibraries...]` # Add a GTest/GMock test case that is called `basename` and compiled from a file called # `basename.cpp`. All tests are linked against `gmock_main` and the threading library. # additional libraries against which the test case has to be linked can be specified as # additional arguments after the `basename` function(addLinkAndDiscoverTest basename) - if (SINGLE_TEST_BINARY) - target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) - else () - addTest(${basename}) - linkAndDiscoverTest(${basename} ${ARGN}) - endif () + if (SINGLE_TEST_BINARY) + target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) + qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) + else () + addTest(${basename}) + linkAndDiscoverTest(${basename} ${ARGN}) + endif () endfunction() diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 09e0587b9c..502c4fbed6 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -681,7 +681,7 @@ testIriKg runConstructQueryTestCase(testCaseConstruct); } -TEST(ExportQueryExecutionTree, TestWithIriExtendedEscaped) { +TEST(ExportQueryExecutionTrees, TestWithIriExtendedEscaped) { std::string kg = "

" " Date: Wed, 24 Jul 2024 21:02:29 +0200 Subject: [PATCH 082/133] Use `#pragma once` --- src/util/CacheableGenerator.h | 5 +---- src/util/IteratorWrapper.h | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 9994063a1d..339fab4cd3 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -2,8 +2,7 @@ // Chair of Algorithms and Data Structures. // Author: Robin Textor-Falconi -#ifndef CACHEABLEGENERATOR_H -#define CACHEABLEGENERATOR_H +#pragma once #include @@ -271,5 +270,3 @@ class CacheableGenerator { } }; }; // namespace ad_utility - -#endif // CACHEABLEGENERATOR_H diff --git a/src/util/IteratorWrapper.h b/src/util/IteratorWrapper.h index e6073697f7..6c6046f608 100644 --- a/src/util/IteratorWrapper.h +++ b/src/util/IteratorWrapper.h @@ -2,8 +2,7 @@ // Chair of Algorithms and Data Structures. // Author: Robin Textor-Falconi -#ifndef ITERATORWRAPPER_H -#define ITERATORWRAPPER_H +#pragma once #include @@ -27,5 +26,3 @@ class IteratorWrapper { }; }; // namespace ad_utility - -#endif // ITERATORWRAPPER_H From c6aa641933b4250aa8a49383f8fbfc72fba452d9 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:17:03 +0200 Subject: [PATCH 083/133] Simplify caching structure to fix bugs --- src/engine/Operation.cpp | 76 +++----- src/engine/QueryExecutionContext.h | 28 +-- src/engine/Result.cpp | 36 +--- src/engine/Result.h | 13 +- src/util/Cache.h | 84 ++------- src/util/CacheableGenerator.h | 89 ++++----- src/util/ConcurrentCache.h | 39 ++-- test/CacheTest.cpp | 290 ++--------------------------- test/CacheableGeneratorTest.cpp | 122 +----------- 9 files changed, 146 insertions(+), 631 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 33b32630dc..8a07982854 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -131,19 +131,14 @@ CacheValue Operation::runComputationAndTransformToCache( auto& cache = _executionContext->getQueryTreeCache(); CacheableResult result{runComputation(timer, computationMode)}; if (!result.isDataEvaluated()) { - result.setOnSizeChanged([&cache, cacheKey](bool isShrinkable, bool, - std::shared_ptr) { - // TODO find out how to handle pinned entries properly. - auto sizeChange = cache.recomputeSize(cacheKey, !isShrinkable); - if (sizeChange == ad_utility::ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE) { - return isShrinkable; - } - return false; - }); - result.setOnNextChunkComputed([runtimeInfo = getRuntimeInfoPointer()]( - std::chrono::milliseconds duration) { - runtimeInfo->totalTime_ += duration; - }); + result.setOnSizeChanged( + [&cache, cacheKey, runtimeInfo = getRuntimeInfoPointer()]( + std::optional duration) { + cache.recomputeSize(cacheKey); + if (duration.has_value()) { + runtimeInfo->totalTime_ += duration.value(); + } + }); } return CacheValue{std::move(result), runtimeInfo()}; } @@ -152,38 +147,6 @@ CacheValue Operation::runComputationAndTransformToCache( Result Operation::extractFromCache( std::shared_ptr result, bool freshlyInserted, bool isRoot, ComputationMode computationMode) { - // Keep backwards compatible for operations that don't support this - if (!result->isDataEvaluated() && - computationMode == ComputationMode::FULLY_MATERIALIZED) { - auto& cache = _executionContext->getQueryTreeCache(); - auto cacheKey = getCacheKey(); - try { - cache.transformValue(getCacheKey(), [&result]( - const CacheValue& oldValue) { - const auto& oldResult = oldValue.resultTable(); - CacheValue value{CacheableResult{oldResult.aggregateTable().value()}, - oldValue.runtimeInfo()}; - result = value.resultTablePtr(); - return value; - }); - } catch (const std::bad_optional_access&) { - ad_utility::Timer timer{ad_utility::Timer::Started}; - CacheableResult newResult{runComputation(timer, computationMode)}; - cache.transformValue( - cacheKey, [this, &newResult, &result](const CacheValue&) { - CacheValue value{std::move(newResult), runtimeInfo()}; - result = value.resultTablePtr(); - return value; - }); - } - // TODO In rare cases this assertion might be violated when - // the cache is cleared while we are transforming. The solution would be to - // replace transformValue with a solution that is part of computeOnce that - // calls some sort of "cache entry needs recomputing check" and replaces the - // value while blocking the individual entry, but not the whole cache. - AD_CORRECTNESS_CHECK(result->isDataEvaluated()); - } - if (result->isDataEvaluated()) { auto resultNumRows = result->idTable().size(); auto resultNumCols = result->idTable().numColumns(); @@ -275,17 +238,30 @@ std::shared_ptr Operation::getResult( cacheKey); }; - bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; + using ad_utility::CachePolicy; + + CachePolicy cachePolicy = computationMode == ComputationMode::ONLY_IF_CACHED + ? CachePolicy::neverCompute + : CachePolicy::computeOnDemand; auto result = - pinResult - ? cache.computeOncePinned(cacheKey, cacheSetup, onlyReadFromCache) - : cache.computeOnce(cacheKey, cacheSetup, onlyReadFromCache); + pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, cachePolicy) + : cache.computeOnce(cacheKey, cacheSetup, cachePolicy); if (result._resultPointer == nullptr) { - AD_CORRECTNESS_CHECK(onlyReadFromCache); + AD_CORRECTNESS_CHECK(cachePolicy == CachePolicy::neverCompute); return nullptr; } + + if (!result._resultPointer->resultTable().isDataEvaluated() && + computationMode == ComputationMode::FULLY_MATERIALIZED) { + AD_CORRECTNESS_CHECK(!actuallyComputed); + result = pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, + CachePolicy::alwaysCompute) + : cache.computeOnce(cacheKey, cacheSetup, + CachePolicy::alwaysCompute); + } + updateRuntimeInformationOnSuccess( result, result._resultPointer->resultTable().isDataEvaluated() ? timer.msecs() diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 0bb51a7a48..5e407dfe22 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -24,33 +24,13 @@ class CacheValue { private: std::shared_ptr resultTable_; RuntimeInformation runtimeInfo_; - std::shared_ptr currentSize_ = - std::make_shared(0); public: + // TODO accept ProtoResult to define type of cacheable result explicit CacheValue(CacheableResult resultTable, RuntimeInformation runtimeInfo) : resultTable_{std::make_shared(std::move(resultTable))}, - runtimeInfo_{std::move(runtimeInfo)} { - if (!resultTable_->isDataEvaluated()) { - auto function = resultTable_->resetOnSizeChanged(); - // We assume this value has previously been set, otherwise this can be - // simplified - AD_CONTRACT_CHECK(function); - resultTable_->setOnSizeChanged( - [function = std::move(function), currentSize = currentSize_]( - bool isShrinkable, bool entryAdded, - std::shared_ptr entry) { - ad_utility::MemorySize size = calculateSize(*entry); - if (entryAdded) { - currentSize->fetch_add(size.getBytes()); - } else { - currentSize->fetch_sub(size.getBytes()); - } - return function(isShrinkable, entryAdded, std::move(entry)); - }); - } - } + runtimeInfo_{std::move(runtimeInfo)} {} CacheValue(CacheValue&&) = default; CacheValue(const CacheValue&) = delete; @@ -77,7 +57,6 @@ class CacheValue { // Clear listeners try { resultTable_->setOnSizeChanged({}); - resultTable_->setOnNextChunkComputed({}); } catch (...) { // Should never happen. The listeners only throw assertion errors // if the result is evaluated. @@ -93,7 +72,8 @@ class CacheValue { if (tablePtr->isDataEvaluated()) { return calculateSize(tablePtr->idTable()); } - return ad_utility::MemorySize::bytes(cacheValue.currentSize_->load()); + return ad_utility::MemorySize::bytes( + tablePtr->idTables().getCurrentSize()); } else { return 0_B; } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index c70864a988..1a2d5e9aed 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -223,46 +223,20 @@ CacheableResult::CacheableResult(ProtoResult protoResult) // _____________________________________________________________________________ void CacheableResult::setOnSizeChanged( - std::function)> + std::function duration)> onSizeChanged) { storage_.idTables().setOnSizeChanged(std::move(onSizeChanged)); } // _____________________________________________________________________________ -std::function)> -CacheableResult::resetOnSizeChanged() { - return storage_.idTables().resetOnSizeChanged(); -} - -// _____________________________________________________________________________ -void CacheableResult::setOnNextChunkComputed( - std::function onNextChunkComputed) { - storage_.idTables().setOnNextChunkComputed(std::move(onNextChunkComputed)); -} +const IdTable& CacheableResult::idTable() const { return storage_.idTable(); } // _____________________________________________________________________________ -std::optional CacheableResult::aggregateTable() const { - try { - std::optional clone; - for (const std::shared_ptr& table : storage_.idTables()) { - if (clone.has_value()) { - clone->insertAtEnd(table->begin(), table->end()); - } else { - clone.emplace(table->clone()); - } - } - AD_CORRECTNESS_CHECK(clone.has_value()); - return ProtoResult{ - std::move(clone).value(), storage_.sortedBy_, - ProtoResult::SharedLocalVocabWrapper{storage_.localVocab_}}; - } catch (const ad_utility::IteratorExpired&) { - return std::nullopt; - } +const ad_utility::CacheableGenerator& CacheableResult::idTables() + const { + return storage_.idTables(); } -// _____________________________________________________________________________ -const IdTable& CacheableResult::idTable() const { return storage_.idTable(); } - // _____________________________________________________________________________ bool CacheableResult::isDataEvaluated() const noexcept { return storage_.isDataEvaluated(); diff --git a/src/engine/Result.h b/src/engine/Result.h index 3e6f3c7dba..829a8864ca 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -172,6 +172,7 @@ class ProtoResult { class CacheableResult { friend class Result; + // TODO Add custom size counter and set max size using StorageType = ResultStorage>; StorageType storage_; @@ -186,19 +187,13 @@ class CacheableResult { explicit CacheableResult(ProtoResult protoResult); void setOnSizeChanged( - std::function)> + std::function)> onSizeChanged); - std::function)> - resetOnSizeChanged(); - - void setOnNextChunkComputed( - std::function onNextChunkComputed); - - std::optional aggregateTable() const; - const IdTable& idTable() const; + const ad_utility::CacheableGenerator& idTables() const; + bool isDataEvaluated() const noexcept; }; diff --git a/src/util/Cache.h b/src/util/Cache.h index 0d6c0cb0a9..d7d74c1e86 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -28,12 +28,6 @@ using namespace ad_utility::memory_literals; static constexpr auto size_t_max = std::numeric_limits::max(); -enum class ResizeResult { - FITS_IN_CACHE, - EXCEEDS_SINGLE_ENTRY_SIZE, - EXCEEDS_MAX_SIZE -}; - /* @brief Associative array for almost arbitrary keys and values that acts as a cache with fixed memory capacity. @@ -232,67 +226,30 @@ class FlexibleCache { // TODO:: implement this functionality } - ResizeResult recomputeSize(const Key& key, bool removeIfEntryGrewTooBig) { - ResizeResult result = ResizeResult::FITS_IN_CACHE; - auto applySizeDifference = [this, &key, &result, removeIfEntryGrewTooBig]( - MemorySize& variable, bool pinned) { - auto newSize = _valueSizeGetter(*(*this)[key]); - auto& oldSize = _sizeMap.at(key); - bool needsShrinking = true; - MemorySize pinnedOffset = pinned ? 0_B : _totalSizePinned; - if (_maxSizeSingleEntry < newSize) { - result = ResizeResult::EXCEEDS_SINGLE_ENTRY_SIZE; - if (removeIfEntryGrewTooBig && !pinned) { - erase(key); - return; - } - // We don't know how to shrink the size here, so if - // `removeIfEntryGrewTooBig` is false, this needs to be handled by the - // caller. - needsShrinking = false; - } else if (_maxSize - std::min(pinnedOffset, _maxSize) < newSize) { - result = ResizeResult::EXCEEDS_MAX_SIZE; - // We can't fit it in the cache, so remove if not pinned - if (!pinned) { - erase(key); - return; - } - } - - if (newSize >= oldSize) { - variable += newSize - oldSize; - } else { - variable -= oldSize - newSize; - } - oldSize = newSize; - if (needsShrinking && _totalSizePinned <= _maxSize) { - makeRoomIfFits(0_B); - } - }; - if (containsPinned(key)) { - applySizeDifference(_totalSizePinned, true); - } else if (containsNonPinned(key)) { - applySizeDifference(_totalSizeNonPinned, false); + void recomputeSize(const Key& key) { + // Pinned entries must not be dynamic in nature + AD_CONTRACT_CHECK(!containsPinned(key)); + if (!containsNonPinned(key)) { + return; } - return result; - } - - void transformValue( - const Key& key, - const InvocableWithExactReturnType auto& - transformer) { - bool pinned = false; - if (containsPinned(key)) { - pinned = true; - } else if (!containsNonPinned(key)) { + auto newSize = _valueSizeGetter(*(*this)[key]); + auto& oldSize = _sizeMap.at(key); + // Entry has grown too big to completely keep within the cache or we can't + // fit it in the cache + if (_maxSizeSingleEntry < newSize || + _maxSize - std::min(_totalSizePinned, _maxSize) < newSize) { + erase(key); return; } - auto transformedValue = transformer(*(*this)[key]); - erase(key); - if (pinned) { - insertPinned(key, std::move(transformedValue)); + + if (newSize >= oldSize) { + _totalSizeNonPinned += newSize - oldSize; } else { - insert(key, std::move(transformedValue)); + _totalSizeNonPinned -= oldSize - newSize; + } + oldSize = newSize; + if (_totalSizePinned <= _maxSize) { + makeRoomIfFits(0_B); } } @@ -481,7 +438,6 @@ class FlexibleCache { FRIEND_TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErasedPinned); FRIEND_TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed); - FRIEND_TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned); FRIEND_TEST(LRUCacheTest, verifyNonPinnedEntriesAreRemovedToMakeRoomForResize); FRIEND_TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement); diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 339fab4cd3..648b098698 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -23,6 +23,13 @@ namespace ad_utility { class IteratorExpired : public std::exception {}; template +struct DefaultSizeCounter { + uint64_t operator()(const std::remove_reference_t&) const { return 1; } +}; + +template &> + SizeCounter = DefaultSizeCounter> class CacheableGenerator { using GenIterator = typename cppcoro::generator::iterator; @@ -37,11 +44,11 @@ class CacheableGenerator { std::vector> cachedValues_{}; MasterIteratorState masterState_ = MasterIteratorState::NOT_STARTED; std::atomic currentOwningThread{}; - // Returns true if cache needs to shrink, accepts a parameter that tells the - // callback if we actually can shrink, if the size changed because of a - // newly generated entry and the entry about to be removed or newly added. - std::function)> onSizeChanged_{}; - std::function onNextChunkComputed_{}; + SizeCounter sizeCounter_{}; + std::atomic currentSize_ = 0; + uint64_t maxSize_ = std::numeric_limits::max(); + std::function)> + onSizeChanged_{}; public: explicit ComputationStorage(cppcoro::generator generator) @@ -92,18 +99,16 @@ class CacheableGenerator { generatorIterator_ = generator_.begin(); } auto stop = std::chrono::steady_clock::now(); - if (onNextChunkComputed_) { - onNextChunkComputed_( - std::chrono::duration_cast(stop - - start)); - } if (generatorIterator_.value() != generator_.end()) { auto pointer = std::make_shared(std::move(*generatorIterator_.value())); - cachedValues_.push_back(pointer); - if (onSizeChanged_ && onSizeChanged_(true, true, std::move(pointer))) { - tryShrinkCache(); + currentSize_.fetch_add(sizeCounter_(*pointer)); + cachedValues_.push_back(std::move(pointer)); + if (onSizeChanged_) { + onSizeChanged_(std::chrono::duration_cast( + stop - start)); } + tryShrinkCacheIfNeccessary(); } if (isMaster) { conditionVariable_.notify_all(); @@ -136,7 +141,7 @@ class CacheableGenerator { } void setOnSizeChanged( - std::function)> + std::function)> onSizeChanged) noexcept { std::unique_lock lock{mutex_, std::defer_lock}; if (currentOwningThread != std::this_thread::get_id()) { @@ -145,46 +150,32 @@ class CacheableGenerator { onSizeChanged_ = std::move(onSizeChanged); } - std::function)> - resetOnSizeChanged() noexcept { - std::unique_lock lock{mutex_, std::defer_lock}; - if (currentOwningThread != std::this_thread::get_id()) { - lock.lock(); - } - auto result = std::move(onSizeChanged_); - // Explicitly empty function because not all standard libraries do that on - // move. - onSizeChanged_ = {}; - return result; - } - - void setOnNextChunkComputed(std::function - onNextChunkComputed) noexcept { - std::unique_lock lock{mutex_, std::defer_lock}; - if (currentOwningThread != std::this_thread::get_id()) { - lock.lock(); + void tryShrinkCacheIfNeccessary() { + if (currentSize_ <= maxSize_) { + return; } - onNextChunkComputed_ = std::move(onNextChunkComputed); - } - - void tryShrinkCache() { size_t maxBound = cachedValues_.size() - 1; for (size_t i = 0; i < maxBound; i++) { auto& pointer = cachedValues_.at(i); if (pointer) { - auto movedPointer = std::move(pointer); + currentSize_.fetch_add(sizeCounter_(*pointer)); + pointer.reset(); if (onSizeChanged_) { - bool isShrinkable = i < maxBound - 1; - if (onSizeChanged_(isShrinkable, false, std::move(movedPointer))) { - AD_CONTRACT_CHECK(isShrinkable); - } else { - break; - } + onSizeChanged_(std::nullopt); + } + if (currentSize_ <= maxSize_ || i >= maxBound - 1) { + break; } } } } + + void setMaxSize(uint64_t maxSize) { + std::unique_lock lock{mutex_}; + maxSize_ = maxSize; + } }; + std::shared_ptr computationStorage_; public: @@ -254,19 +245,17 @@ class CacheableGenerator { IteratorSentinel end() const noexcept { return IteratorSentinel{}; } void setOnSizeChanged( - std::function)> + std::function)> onSizeChanged) noexcept { computationStorage_->setOnSizeChanged(std::move(onSizeChanged)); } - std::function)> - resetOnSizeChanged() { - return computationStorage_->resetOnSizeChanged(); + uint64_t getCurrentSize() const { + return computationStorage_->currentSize_.load(); } - void setOnNextChunkComputed(std::function - onNextChunkComputed) noexcept { - computationStorage_->setOnNextChunkComputed(std::move(onNextChunkComputed)); + void setMaxSize(uint64_t maxSize) { + computationStorage_->setMaxSize(maxSize); } }; }; // namespace ad_utility diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 8a930d3dc9..4e21be7142 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -43,6 +43,12 @@ enum struct CacheStatus { notInCacheAndNotComputed }; +enum class CachePolicy { + neverCompute, + computeOnDemand, + alwaysCompute, +}; + // Convert a `CacheStatus` to a human-readable string. We mostly use it for // JSON exports, so we use a hyphenated format. constexpr std::string_view toString(CacheStatus status) { @@ -184,8 +190,8 @@ class ConcurrentCache { ResultAndCacheStatus computeOnce( const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, - bool onlyReadFromCache = false) { - return computeOnceImpl(false, key, computeFunction, onlyReadFromCache); + CachePolicy cachePolicy = CachePolicy::computeOnDemand) { + return computeOnceImpl(false, key, computeFunction, cachePolicy); } /// Similar to computeOnce, with the following addition: After the call @@ -193,21 +199,12 @@ class ConcurrentCache { ResultAndCacheStatus computeOncePinned( const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, - bool onlyReadFromCache = false) { - return computeOnceImpl(true, key, computeFunction, onlyReadFromCache); + CachePolicy cachePolicy = CachePolicy::computeOnDemand) { + return computeOnceImpl(true, key, computeFunction, cachePolicy); } - auto recomputeSize(const Key& key, bool removeIfEntryGrewTooBig) { - return _cacheAndInProgressMap.wlock()->_cache.recomputeSize( - key, removeIfEntryGrewTooBig); - } - - void transformValue( - const Key& key, - const InvocableWithExactReturnType auto& - transformer) { - return _cacheAndInProgressMap.wlock()->_cache.transformValue(key, - transformer); + void recomputeSize(const Key& key) { + _cacheAndInProgressMap.wlock()->_cache.recomputeSize(key); } /// Clear the cache (but not the pinned entries) @@ -320,11 +317,14 @@ class ConcurrentCache { } private: - // implementation for computeOnce (pinned and normal variant). + // TODO accept computeFunction with or without old cached value in + // case cached value needs recomputation due to some condition like pinned/non + // lazy requirement implementation for computeOnce (pinned and normal + // variant). ResultAndCacheStatus computeOnceImpl( bool pinned, const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, - bool onlyReadFromCache) { + CachePolicy cachePolicy) { using std::make_shared; bool mustCompute; shared_ptr resultInProgress; @@ -341,9 +341,10 @@ class ConcurrentCache { if (contained) { // the result is in the cache, simply return it. return {cache[key], cacheStatus}; - } else if (onlyReadFromCache) { + } else if (cachePolicy == CachePolicy::neverCompute) { return {nullptr, CacheStatus::notInCacheAndNotComputed}; - } else if (lockPtr->_inProgress.contains(key)) { + } else if (lockPtr->_inProgress.contains(key) && + cachePolicy == CachePolicy::computeOnDemand) { // the result is not cached, but someone else is computing it. // it is important, that we do not immediately call getResult() since // this call blocks and we currently hold a lock. diff --git a/test/CacheTest.cpp b/test/CacheTest.cpp index 7ac77d6971..fbdab252e5 100644 --- a/test/CacheTest.cpp +++ b/test/CacheTest.cpp @@ -12,7 +12,6 @@ using std::string; using namespace ad_utility::memory_literals; -using enum ad_utility::ResizeResult; using Vec = std::vector; [[maybe_unused]] auto vectorSizeGetter = [](const auto& pointer) { @@ -145,101 +144,6 @@ TEST(LRUCacheTest, testDecreasingCapacity) { ASSERT_FALSE(cache["4"]); } -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyTransformValueWorksForNonPinnedValues) { - LRUCache> cache{2, 3_B}; - cache.insert("1", "x"); - - ASSERT_EQ(cache.nonPinnedSize(), 1_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - - cache.transformValue("1", - [](const std::string& value) { return value + "a"; }); - - ASSERT_EQ(cache.nonPinnedSize(), 2_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - ASSERT_TRUE(cache.contains("1")); - ASSERT_EQ(*cache["1"], "xa"); - - cache.insert("2", "y"); - - ASSERT_EQ(cache.nonPinnedSize(), 3_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - - cache.transformValue("1", - [](const std::string& value) { return value + "b"; }); - - ASSERT_EQ(cache.nonPinnedSize(), 3_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - - ASSERT_TRUE(cache.contains("1")); - ASSERT_FALSE(cache.contains("2")); - ASSERT_EQ(*cache["1"], "xab"); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyTransformValueWorksForPinnedValues) { - LRUCache> cache{1}; - cache.insertPinned("1", "x"); - - ASSERT_EQ(cache.nonPinnedSize(), 0_B); - ASSERT_EQ(cache.pinnedSize(), 1_B); - - cache.transformValue("1", - [](const std::string& value) { return value + "a"; }); - - ASSERT_EQ(cache.nonPinnedSize(), 0_B); - ASSERT_EQ(cache.pinnedSize(), 2_B); - ASSERT_TRUE(cache.contains("1")); - ASSERT_EQ(*cache["1"], "xa"); - - cache.insert("2", "y"); - - ASSERT_EQ(cache.nonPinnedSize(), 1_B); - ASSERT_EQ(cache.pinnedSize(), 2_B); - ASSERT_TRUE(cache.contains("1")); - ASSERT_TRUE(cache.contains("2")); - - cache.transformValue("1", - [](const std::string& value) { return value + "b"; }); - - ASSERT_EQ(cache.nonPinnedSize(), 0_B); - ASSERT_EQ(cache.pinnedSize(), 3_B); - ASSERT_TRUE(cache.contains("1")); - ASSERT_FALSE(cache.contains("2")); - - ASSERT_EQ(*cache["1"], "xab"); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyTransformValueWorksIsNoOpForNonExistantValues) { - LRUCache> cache{1}; - - ASSERT_EQ(cache.nonPinnedSize(), 0_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - - cache.transformValue("1", - [](const std::string&) { return std::string{"a"}; }); - - ASSERT_EQ(cache.nonPinnedSize(), 0_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - ASSERT_FALSE(cache.contains("1")); - - cache.insert("2", "y"); - - ASSERT_EQ(cache.nonPinnedSize(), 1_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - ASSERT_TRUE(cache.contains("2")); - - cache.transformValue("1", - [](const std::string&) { return std::string{"a"}; }); - - ASSERT_EQ(cache.nonPinnedSize(), 1_B); - ASSERT_EQ(cache.pinnedSize(), 0_B); - ASSERT_FALSE(cache.contains("1")); - ASSERT_TRUE(cache.contains("2")); -} - // _____________________________________________________________________________ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErased) { LRUCache>, decltype(vectorSizeGetter)> @@ -312,9 +216,9 @@ TEST(LRUCacheTest, // _____________________________________________________________________________ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { LRUCache>, decltype(vectorSizeGetter)> - cache{3, 8_B, 4_B}; + cache{3, 12_B, 8_B}; - auto vecA = std::make_shared(); + auto vecA = std::make_shared(0); auto vecB = std::make_shared(1); cache.insert(0, vecA); @@ -328,7 +232,7 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { // Cache does was not notified about the size change ASSERT_EQ(cache._totalSizeNonPinned, 4_B); - ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); + cache.recomputeSize(0); ASSERT_EQ(cache._totalSizeNonPinned, 8_B); ASSERT_TRUE(cache.contains(0)); @@ -336,142 +240,24 @@ TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { vecA->resize(2); - ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, false)); + cache.recomputeSize(0); ASSERT_EQ(cache._totalSizeNonPinned, 12_B); ASSERT_TRUE(cache.contains(0)); ASSERT_TRUE(cache.contains(1)); - vecA->resize(1); - - ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); - - ASSERT_EQ(cache._totalSizeNonPinned, 8_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - - auto vecC = std::make_shared(0); - cache.insert(2, vecC); - vecB->resize(1); - - ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(1, false)); - - ASSERT_EQ(cache._totalSizeNonPinned, 8_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - ASSERT_TRUE(cache.contains(2)); - - // Set to high value to avoid getting limited by this. - cache.setMaxSizeSingleEntry(64_B); - vecC->resize(3); - ASSERT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, false)); + cache.recomputeSize(1); ASSERT_EQ(cache._totalSizeNonPinned, 8_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - ASSERT_FALSE(cache.contains(2)); - - cache.setMaxSizeSingleEntry(4_B); - vecA->resize(2); - - ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); - - ASSERT_EQ(cache._totalSizeNonPinned, 4_B); ASSERT_FALSE(cache.contains(0)); ASSERT_TRUE(cache.contains(1)); - ASSERT_FALSE(cache.contains(2)); - vecB->clear(); - cache.erase(1); + vecB->resize(3); + cache.recomputeSize(1); ASSERT_EQ(cache._totalSizeNonPinned, 0_B); ASSERT_FALSE(cache.contains(0)); ASSERT_FALSE(cache.contains(1)); - ASSERT_FALSE(cache.contains(2)); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputedPinned) { - LRUCache>, decltype(vectorSizeGetter)> - cache{3, 8_B, 4_B}; - - auto vecA = std::make_shared(); - auto vecB = std::make_shared(1); - - cache.insertPinned(0, vecA); - cache.insertPinned(1, vecB); - - ASSERT_EQ(cache._totalSizePinned, 4_B); - - vecA->resize(1); - vecB->resize(2); - - // Cache does was not notified about the size change - ASSERT_EQ(cache._totalSizePinned, 4_B); - - ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); - - ASSERT_EQ(cache._totalSizePinned, 8_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - - vecA->resize(2); - - ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, false)); - ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); - - ASSERT_EQ(cache._totalSizePinned, 12_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - - vecA->resize(1); - - ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(0, false)); - - ASSERT_EQ(cache._totalSizePinned, 8_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - - auto vecC = std::make_shared(0); - cache.insertPinned(2, vecC); - vecB->resize(1); - - ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(1, false)); - - ASSERT_EQ(cache._totalSizePinned, 8_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - ASSERT_TRUE(cache.contains(2)); - - // Set to high value to avoid getting limited by this. - cache.setMaxSizeSingleEntry(64_B); - vecC->resize(3); - ASSERT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, true)); - - ASSERT_EQ(cache._totalSizePinned, 20_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - ASSERT_TRUE(cache.contains(2)); - cache.erase(2); - - cache.setMaxSizeSingleEntry(4_B); - vecA->resize(2); - - ASSERT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(0, true)); - - ASSERT_EQ(cache._totalSizePinned, 12_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - ASSERT_FALSE(cache.contains(2)); - cache.erase(0); - - vecB->clear(); - cache.erase(1); - - ASSERT_EQ(cache._totalSizePinned, 0_B); - ASSERT_FALSE(cache.contains(0)); - ASSERT_FALSE(cache.contains(1)); - ASSERT_FALSE(cache.contains(2)); } // _____________________________________________________________________________ @@ -489,7 +275,7 @@ TEST(LRUCacheTest, verifyNonPinnedEntriesAreRemovedToMakeRoomForResize) { vecC->resize(1); - ASSERT_EQ(FITS_IN_CACHE, cache.recomputeSize(2, true)); + cache.recomputeSize(2); ASSERT_TRUE(cache.contains(0)); ASSERT_FALSE(cache.contains(1)); ASSERT_TRUE(cache.contains(2)); @@ -500,12 +286,7 @@ TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement) { LRUCache> cache{1}; cache.insert("1", "a"); - EXPECT_EQ(FITS_IN_CACHE, cache.recomputeSize("2", false)); - - EXPECT_TRUE(cache.contains("1")); - EXPECT_FALSE(cache.contains("2")); - - EXPECT_EQ(FITS_IN_CACHE, cache.recomputeSize("2", true)); + cache.recomputeSize("2"); EXPECT_TRUE(cache.contains("1")); EXPECT_FALSE(cache.contains("2")); @@ -513,65 +294,32 @@ TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement) { TEST(LRUCacheTest, verifyRecomputeDoesNoticeExceedingSizeOnShrink) { LRUCache>, decltype(vectorSizeGetter)> - cache{3, 12_B, 8_B}; + cache{3, 32_B, 16_B}; auto vecA = std::make_shared(2); auto vecB = std::make_shared(1); - auto vecC = std::make_shared(0); + auto vecC = std::make_shared(4); cache.insert(0, vecA); cache.insert(1, vecB); cache.insert(2, vecC); - vecC->resize(4); - - EXPECT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(2, false)); - - EXPECT_TRUE(cache.contains(0)); - EXPECT_TRUE(cache.contains(1)); - EXPECT_TRUE(cache.contains(2)); - + cache.setMaxSizeSingleEntry(8_B); vecC->resize(3); - - EXPECT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(2, false)); + cache.recomputeSize(2); EXPECT_TRUE(cache.contains(0)); EXPECT_TRUE(cache.contains(1)); - EXPECT_TRUE(cache.contains(2)); - - vecC->resize(2); - - EXPECT_EQ(FITS_IN_CACHE, cache.recomputeSize(2, false)); - - EXPECT_FALSE(cache.contains(0)); - EXPECT_TRUE(cache.contains(1)); - EXPECT_TRUE(cache.contains(2)); - - vecC->resize(5); - EXPECT_EQ(EXCEEDS_SINGLE_ENTRY_SIZE, cache.recomputeSize(2, false)); - - vecC->resize(4); - cache.setMaxSizeSingleEntry(16_B); - - EXPECT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(2, false)); - - EXPECT_FALSE(cache.contains(0)); - EXPECT_TRUE(cache.contains(1)); EXPECT_FALSE(cache.contains(2)); } -TEST(LRUCacheTest, verifyRecomputeDoesConsiderPinnedSizeForMaxSize) { - LRUCache>, decltype(vectorSizeGetter)> - cache{3, 8_B, 8_B}; - - auto vecA = std::make_shared(2); - auto vecB = std::make_shared(0); - - cache.insertPinned(0, vecA); - cache.insert(1, vecB); +// _____________________________________________________________________________ +TEST(LRUCacheTest, verifyRecomputeDoesErrorOutWhenPinned) { + LRUCache cache{3, 12_B, + 8_B}; - vecB->resize(1); + cache.insertPinned(0, 0); - EXPECT_EQ(EXCEEDS_MAX_SIZE, cache.recomputeSize(1, false)); + EXPECT_THROW(cache.recomputeSize(0), ad_utility::Exception); } } // namespace ad_utility diff --git a/test/CacheableGeneratorTest.cpp b/test/CacheableGeneratorTest.cpp index feff2aa662..4ad86d1b2a 100644 --- a/test/CacheableGeneratorTest.cpp +++ b/test/CacheableGeneratorTest.cpp @@ -205,7 +205,7 @@ TEST(CacheableGenerator, verifyExhaustedMasterCausesFreeForAll) { } // _____________________________________________________________________________ -TEST(CacheableGenerator, verifyOnNextChunkComputedIsCalled) { +TEST(CacheableGenerator, verifyOnSizeChangedIsCalledWithCorrectTimingInfo) { auto timedGenerator = []() -> generator { while (true) { #ifndef _QLEVER_NO_TIMING_TESTS @@ -219,7 +219,7 @@ TEST(CacheableGenerator, verifyOnNextChunkComputedIsCalled) { CacheableGenerator generator{std::move(timedGenerator)}; - generator.setOnNextChunkComputed([&](auto duration) { + generator.setOnSizeChanged([&](auto duration) { #ifndef _QLEVER_NO_TIMING_TESTS using ::testing::AllOf; using ::testing::Le; @@ -304,11 +304,7 @@ TEST(CacheableGenerator, verifyOnNextChunkComputedIsCalled) { TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { CacheableGenerator generator{testGenerator(3)}; uint32_t callCounter = 0; - generator.setOnSizeChanged( - [&](bool canShrink, bool, std::shared_ptr) { - ++callCounter; - return canShrink && callCounter > 2; - }); + generator.setOnSizeChanged([&](auto) { ++callCounter; }); auto iterator = generator.begin(true); EXPECT_EQ(callCounter, 1); @@ -323,6 +319,8 @@ TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { EXPECT_EQ(callCounter, 2); ASSERT_NE(iterator, generator.end()); + generator.setMaxSize(1); + ++slaveIterator1; EXPECT_EQ(callCounter, 2); ASSERT_NE(slaveIterator1, generator.end()); @@ -349,48 +347,11 @@ TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { EXPECT_THROW(++slaveIterator2, ad_utility::IteratorExpired); } -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyOnSizeChangedIsCalledWithCorrectParameters) { - CacheableGenerator generator{testGenerator(3)}; - uint32_t callCounter = 0; - generator.setOnSizeChanged([&](bool canShrink, bool wasAdded, - std::shared_ptr pointer) { - switch (callCounter) { - case 0: - case 1: - EXPECT_TRUE(wasAdded); - EXPECT_EQ(*pointer, callCounter); - break; - case 2: - EXPECT_FALSE(wasAdded); - EXPECT_EQ(*pointer, 0); - break; - default: - ADD_FAILURE() << "Invalid call count: " << callCounter; - break; - } - ++callCounter; - return canShrink && callCounter > 1; - }); - - auto iterator = generator.begin(true); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(iterator, generator.end()); - - ++iterator; - EXPECT_EQ(callCounter, 3); - ASSERT_NE(iterator, generator.end()); -} - // _____________________________________________________________________________ TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { CacheableGenerator generator{testGenerator(3)}; uint32_t callCounter = 0; - generator.setOnSizeChanged( - [&](bool canShrink, bool, std::shared_ptr) { - ++callCounter; - return canShrink && callCounter > 2; - }); + generator.setOnSizeChanged([&](auto) { ++callCounter; }); auto iterator = generator.begin(true); EXPECT_EQ(callCounter, 1); @@ -404,6 +365,8 @@ TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { EXPECT_EQ(callCounter, 2); ASSERT_NE(iterator, generator.end()); + generator.setMaxSize(0); + ++slaveIterator; EXPECT_EQ(callCounter, 2); ASSERT_NE(slaveIterator, generator.end()); @@ -422,52 +385,10 @@ TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { EXPECT_EQ(**slaveIterator, 2); } -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyShrinkStopsShrinking) { - CacheableGenerator generator{testGenerator(3)}; - uint32_t callCounter = 0; - generator.setOnSizeChanged( - [&](bool canShrink, bool, std::shared_ptr) { - ++callCounter; - return canShrink && callCounter == 3; - }); - - auto iterator = generator.begin(true); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(iterator, generator.end()); - - auto slaveIterator = generator.begin(); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(slaveIterator, generator.end()); - - ++iterator; - EXPECT_EQ(callCounter, 2); - ASSERT_NE(iterator, generator.end()); - - ++iterator; - EXPECT_EQ(callCounter, 4); - ASSERT_NE(iterator, generator.end()); - - ++iterator; - EXPECT_EQ(callCounter, 4); - EXPECT_EQ(iterator, generator.end()); - - ++slaveIterator; - ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(**slaveIterator, 1); - - ++slaveIterator; - ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(**slaveIterator, 2); -} - // _____________________________________________________________________________ TEST(CacheableGenerator, verifySlavesCantBlockMasterIterator) { CacheableGenerator generator{testGenerator(3)}; - generator.setOnSizeChanged( - [](bool canShrink, bool, std::shared_ptr) { - return canShrink; - }); + generator.setMaxSize(1); auto masterIterator = generator.begin(true); ASSERT_NE(masterIterator, generator.end()); @@ -490,28 +411,3 @@ TEST(CacheableGenerator, verifySlavesCantBlockMasterIterator) { ++masterIterator; EXPECT_EQ(masterIterator, generator.end()); } - -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyResetDoesRemoveAndReturnListener) { - CacheableGenerator generator{testGenerator(3)}; - uint32_t callCounter = 0; - generator.setOnSizeChanged( - [&](bool canShrink, bool, std::shared_ptr) { - ++callCounter; - return canShrink; - }); - - auto function = generator.resetOnSizeChanged(); - ASSERT_TRUE(function); - - auto iterator = generator.begin(true); - EXPECT_EQ(callCounter, 0); - - auto result = function(false, false, {}); - EXPECT_EQ(callCounter, 1); - EXPECT_FALSE(result); - - result = function(true, false, {}); - EXPECT_EQ(callCounter, 2); - EXPECT_TRUE(result); -} From d74b89dce6e5316a6c013bb8564bf20c208b797d Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 1 Aug 2024 01:47:32 +0200 Subject: [PATCH 084/133] Fix size calculation --- src/engine/Operation.cpp | 3 ++- src/engine/QueryExecutionContext.h | 12 +----------- src/engine/Result.cpp | 23 ++++++++++++++++++----- src/engine/Result.h | 18 ++++++++++++++---- src/util/Cache.h | 4 ++++ src/util/ConcurrentCache.h | 8 ++++---- test/ExportQueryExecutionTreesTest.cpp | 21 ++++++++++++++------- test/SparqlDataTypesTest.cpp | 12 ++++++++---- 8 files changed, 65 insertions(+), 36 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 8a07982854..a47dd5a8c4 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -129,7 +129,8 @@ CacheValue Operation::runComputationAndTransformToCache( ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey) { auto& cache = _executionContext->getQueryTreeCache(); - CacheableResult result{runComputation(timer, computationMode)}; + CacheableResult result{runComputation(timer, computationMode), + cache.getMaxSizeSingleEntry().getBytes()}; if (!result.isDataEvaluated()) { result.setOnSizeChanged( [&cache, cacheKey, runtimeInfo = getRuntimeInfoPointer()]( diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 5e407dfe22..a8a5255d14 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -26,7 +26,6 @@ class CacheValue { RuntimeInformation runtimeInfo_; public: - // TODO accept ProtoResult to define type of cacheable result explicit CacheValue(CacheableResult resultTable, RuntimeInformation runtimeInfo) : resultTable_{std::make_shared(std::move(resultTable))}, @@ -47,11 +46,6 @@ class CacheValue { return runtimeInfo_; } - static ad_utility::MemorySize calculateSize(const IdTable& idTable) { - return ad_utility::MemorySize::bytes(idTable.size() * idTable.numColumns() * - sizeof(Id)); - }; - ~CacheValue() { if (resultTable_ && !resultTable_->isDataEvaluated()) { // Clear listeners @@ -69,11 +63,7 @@ class CacheValue { struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { if (const auto& tablePtr = cacheValue.resultTable_; tablePtr) { - if (tablePtr->isDataEvaluated()) { - return calculateSize(tablePtr->idTable()); - } - return ad_utility::MemorySize::bytes( - tablePtr->idTables().getCurrentSize()); + return tablePtr->getCurrentSize(); } else { return 0_B; } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 1a2d5e9aed..fe70f4479a 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -210,16 +210,22 @@ bool ProtoResult::isDataEvaluated() const noexcept { return storage_.isDataEvaluated(); } // _____________________________________________________________________________ -CacheableResult::CacheableResult(ProtoResult protoResult) +CacheableResult::CacheableResult(ProtoResult protoResult, + uint64_t maxElementSize) : storage_{StorageType{ protoResult.isDataEvaluated() ? decltype(StorageType::data_){std::move( protoResult.storage_.idTable())} - : decltype(StorageType::data_){ad_utility::CacheableGenerator{ + : decltype(StorageType::data_){ad_utility::CacheableGenerator< + IdTable, SizeCalculator>{ std::move(protoResult.storage_.idTables())}}, std::move(protoResult.storage_.sortedBy_), std::move(protoResult.storage_.localVocab_), - }} {} + }} { + if (!storage_.isDataEvaluated()) { + storage_.idTables().setMaxSize(maxElementSize); + } +} // _____________________________________________________________________________ void CacheableResult::setOnSizeChanged( @@ -232,8 +238,8 @@ void CacheableResult::setOnSizeChanged( const IdTable& CacheableResult::idTable() const { return storage_.idTable(); } // _____________________________________________________________________________ -const ad_utility::CacheableGenerator& CacheableResult::idTables() - const { +const ad_utility::CacheableGenerator& +CacheableResult::idTables() const { return storage_.idTables(); } @@ -242,6 +248,13 @@ bool CacheableResult::isDataEvaluated() const noexcept { return storage_.isDataEvaluated(); } +// _____________________________________________________________________________ +ad_utility::MemorySize CacheableResult::getCurrentSize() const { + return ad_utility::MemorySize::bytes( + storage_.isDataEvaluated() ? SizeCalculator{}(storage_.idTable()) + : storage_.idTables().getCurrentSize()); +} + // _____________________________________________________________________________ Result::Result(std::shared_ptr idTable, std::vector sortedBy, LocalVocabPtr localVocab) diff --git a/src/engine/Result.h b/src/engine/Result.h index 829a8864ca..25173c6375 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -172,9 +172,16 @@ class ProtoResult { class CacheableResult { friend class Result; - // TODO Add custom size counter and set max size + + struct SizeCalculator { + uint64_t operator()(const IdTable& idTable) const { + return idTable.size() * idTable.numColumns() * sizeof(Id); + } + }; + using StorageType = - ResultStorage>; + ResultStorage>; StorageType storage_; public: @@ -184,7 +191,7 @@ class CacheableResult { CacheableResult(CacheableResult&& other) = default; CacheableResult& operator=(CacheableResult&& other) = default; - explicit CacheableResult(ProtoResult protoResult); + CacheableResult(ProtoResult protoResult, uint64_t maxElementSize); void setOnSizeChanged( std::function)> @@ -192,9 +199,12 @@ class CacheableResult { const IdTable& idTable() const; - const ad_utility::CacheableGenerator& idTables() const; + const ad_utility::CacheableGenerator& idTables() + const; bool isDataEvaluated() const noexcept; + + ad_utility::MemorySize getCurrentSize() const; }; // The result of an `Operation`. This is the class QLever uses for all diff --git a/src/util/Cache.h b/src/util/Cache.h index d7d74c1e86..5d59069aa9 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -226,6 +226,10 @@ class FlexibleCache { // TODO:: implement this functionality } + MemorySize getMaxSizeSingleEntry() const noexcept { + return _maxSizeSingleEntry; + } + void recomputeSize(const Key& key) { // Pinned entries must not be dynamic in nature AD_CONTRACT_CHECK(!containsPinned(key)); diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 4e21be7142..56ac53fe70 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -277,6 +277,10 @@ class ConcurrentCache { _cacheAndInProgressMap.wlock()->_cache.setMaxSizeSingleEntry(maxSize); } + MemorySize getMaxSizeSingleEntry() const { + return _cacheAndInProgressMap.wlock()->_cache.getMaxSizeSingleEntry(); + } + private: using ResultInProgress = ConcurrentCacheDetail::ResultInProgress; @@ -317,10 +321,6 @@ class ConcurrentCache { } private: - // TODO accept computeFunction with or without old cached value in - // case cached value needs recomputation due to some condition like pinned/non - // lazy requirement implementation for computeOnce (pinned and normal - // variant). ResultAndCacheStatus computeOnceImpl( bool pinned, const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 502c4fbed6..70ab494589 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1078,7 +1078,8 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { Result result = Result::createResultWithFullyEvaluatedIdTable( std::make_shared( - ProtoResult{std::move(idTable), {}, LocalVocab{}})); + ProtoResult{std::move(idTable), {}, LocalVocab{}}, + std::numeric_limits::max())); auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -1110,7 +1111,8 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { Result result = Result::createResultAsMasterConsumer( std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + std::numeric_limits::max()), []() {}); auto generator = ExportQueryExecutionTrees::getIdTables(result); @@ -1144,7 +1146,8 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { Result result = Result::createResultAsMasterConsumer( std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + std::numeric_limits::max()), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); @@ -1177,7 +1180,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::createResultAsMasterConsumer( std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + std::numeric_limits::max()), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); @@ -1214,7 +1218,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::createResultAsMasterConsumer( std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + std::numeric_limits::max()), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); @@ -1255,7 +1260,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::createResultAsMasterConsumer( std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + std::numeric_limits::max()), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); @@ -1304,7 +1310,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::createResultAsMasterConsumer( std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}), + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + std::numeric_limits::max()), []() {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp index 4787bd66e0..cd634b0cdf 100644 --- a/test/SparqlDataTypesTest.cpp +++ b/test/SparqlDataTypesTest.cpp @@ -17,8 +17,10 @@ namespace { struct ContextWrapper { Index _index{ad_utility::makeUnlimitedAllocator()}; Result _resultTable{Result::createResultWithFullyEvaluatedIdTable( - std::make_shared(ProtoResult{ - IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}))}; + std::make_shared( + ProtoResult{ + IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}, + std::numeric_limits::max()))}; // TODO `VariableToColumnMap` VariableToColumnMap _hashMap{}; @@ -29,8 +31,10 @@ struct ContextWrapper { void setIdTable(IdTable&& table) { _resultTable = Result::createResultWithFullyEvaluatedIdTable( - std::make_shared(ProtoResult{ - std::move(table), {}, _resultTable.getSharedLocalVocab()})); + std::make_shared( + ProtoResult{ + std::move(table), {}, _resultTable.getSharedLocalVocab()}, + std::numeric_limits::max())); } }; From bfc9d8f8a4e0026c3718994cdea295db51432407 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 1 Aug 2024 16:17:48 +0200 Subject: [PATCH 085/133] Remove complicated atomic mutex check mechanism --- src/util/CacheableGenerator.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 648b098698..0a14232c8f 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -4,13 +4,10 @@ #pragma once -#include - #include #include #include #include -#include #include #include "util/Exception.h" @@ -43,7 +40,6 @@ class CacheableGenerator { std::optional generatorIterator_{}; std::vector> cachedValues_{}; MasterIteratorState masterState_ = MasterIteratorState::NOT_STARTED; - std::atomic currentOwningThread{}; SizeCounter sizeCounter_{}; std::atomic currentSize_ = 0; uint64_t maxSize_ = std::numeric_limits::max(); @@ -61,9 +57,6 @@ class CacheableGenerator { private: void advanceTo(size_t index, bool isMaster) { std::unique_lock lock{mutex_}; - currentOwningThread = std::this_thread::get_id(); - absl::Cleanup cleanup{ - [this]() { currentOwningThread = std::thread::id{}; }}; AD_CONTRACT_CHECK(index <= cachedValues_.size()); // Make sure master iterator does exist and we're not blocking // indefinitely @@ -143,10 +136,7 @@ class CacheableGenerator { void setOnSizeChanged( std::function)> onSizeChanged) noexcept { - std::unique_lock lock{mutex_, std::defer_lock}; - if (currentOwningThread != std::this_thread::get_id()) { - lock.lock(); - } + std::unique_lock lock{mutex_}; onSizeChanged_ = std::move(onSizeChanged); } From 564d54b48b53bceb1cb600c8ca54a7b77706528f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 1 Aug 2024 16:37:51 +0200 Subject: [PATCH 086/133] Avoid copy --- src/util/IteratorWrapper.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/util/IteratorWrapper.h b/src/util/IteratorWrapper.h index 6c6046f608..745fb5815e 100644 --- a/src/util/IteratorWrapper.h +++ b/src/util/IteratorWrapper.h @@ -6,10 +6,13 @@ #include +#include "Exception.h" + namespace ad_utility { template class IteratorWrapper { + bool used_ = false; OriginalIterable& iterable_; std::tuple args_; @@ -18,8 +21,11 @@ class IteratorWrapper { : iterable_{iterator}, args_{std::move(args)...} {} auto begin() { - return std::apply([this](auto... args) { return iterable_.begin(args...); }, - args_); + AD_CONTRACT_CHECK(!used_); + used_ = true; + return std::apply( + [this](auto... args) { return iterable_.begin(std::move(args)...); }, + std::move(args_)); } auto end() { return iterable_.end(); } From d566d5bbb5a89361d0729b758035548afab286e6 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 2 Aug 2024 00:49:19 +0200 Subject: [PATCH 087/133] Use timer class to simplify calls --- src/util/CacheableGenerator.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 0a14232c8f..70e81811ab 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -13,6 +13,7 @@ #include "util/Exception.h" #include "util/Generator.h" #include "util/Synchronized.h" +#include "util/Timer.h" #include "util/UniqueCleanup.h" namespace ad_utility { @@ -84,22 +85,21 @@ class CacheableGenerator { }); return; } - auto start = std::chrono::steady_clock::now(); + Timer timer{Timer::Started}; if (generatorIterator_.has_value()) { AD_CONTRACT_CHECK(generatorIterator_.value() != generator_.end()); ++generatorIterator_.value(); } else { generatorIterator_ = generator_.begin(); } - auto stop = std::chrono::steady_clock::now(); + timer.stop(); if (generatorIterator_.value() != generator_.end()) { auto pointer = std::make_shared(std::move(*generatorIterator_.value())); currentSize_.fetch_add(sizeCounter_(*pointer)); cachedValues_.push_back(std::move(pointer)); if (onSizeChanged_) { - onSizeChanged_(std::chrono::duration_cast( - stop - start)); + onSizeChanged_(std::chrono::milliseconds{timer.msecs()}); } tryShrinkCacheIfNeccessary(); } From a164ef4a529e940f74e3f524d941a1b324c083b1 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 2 Aug 2024 00:50:01 +0200 Subject: [PATCH 088/133] Start adding some documentation --- src/util/CacheableGenerator.h | 6 ++++++ src/util/IteratorWrapper.h | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 70e81811ab..00fc9e5e1d 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -18,13 +18,19 @@ namespace ad_utility { +/// Custom exception type that indicates the consumer took too long to consume +/// the generator. class IteratorExpired : public std::exception {}; +/// Lambda-like type that always returns 1 to indicate size 1 for every element +/// in the `CacheableGenerator`. template struct DefaultSizeCounter { uint64_t operator()(const std::remove_reference_t&) const { return 1; } }; +/// Range-like type that allows multiple consumers to consume the same +/// single-consumption generator asynchronously. template &> SizeCounter = DefaultSizeCounter> diff --git a/src/util/IteratorWrapper.h b/src/util/IteratorWrapper.h index 745fb5815e..2d177fb7b3 100644 --- a/src/util/IteratorWrapper.h +++ b/src/util/IteratorWrapper.h @@ -10,6 +10,11 @@ namespace ad_utility { +/// Helper class allowing to use range-like datastructures with arguments for +/// their begin() member function within range-based for loops like this: +/// +/// This calls something.begin(1, 2, 3): +/// for (auto elem : IteratorWrapper{something, 1, 2, 3}) {} template class IteratorWrapper { bool used_ = false; From 23713cc0b4314403208c4d0c1fb12c0e468f9fcf Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 2 Aug 2024 00:52:37 +0200 Subject: [PATCH 089/133] Unlock before notifying --- src/util/CacheableGenerator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 00fc9e5e1d..c84aabf0cc 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -110,6 +110,7 @@ class CacheableGenerator { tryShrinkCacheIfNeccessary(); } if (isMaster) { + lock.unlock(); conditionVariable_.notify_all(); } } From 603a48bec2ce43aa0ac922705ba8d9ae188d2be0 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 2 Aug 2024 19:01:29 +0200 Subject: [PATCH 090/133] Adjust Cache to simplify code a bit --- src/util/Cache.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/util/Cache.h b/src/util/Cache.h index 5d59069aa9..2de3a307a2 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -237,7 +237,7 @@ class FlexibleCache { return; } auto newSize = _valueSizeGetter(*(*this)[key]); - auto& oldSize = _sizeMap.at(key); + auto& sizeInMap = _sizeMap.at(key); // Entry has grown too big to completely keep within the cache or we can't // fit it in the cache if (_maxSizeSingleEntry < newSize || @@ -246,12 +246,12 @@ class FlexibleCache { return; } - if (newSize >= oldSize) { - _totalSizeNonPinned += newSize - oldSize; - } else { - _totalSizeNonPinned -= oldSize - newSize; - } - oldSize = newSize; + // `MemorySize` type does not allow for negative values, but they are safe + // here, so we do it in size_t instead and convert back. + _totalSizeNonPinned = + MemorySize::bytes(_totalSizeNonPinned.getBytes() - + sizeInMap.getBytes() + newSize.getBytes()); + sizeInMap = newSize; if (_totalSizePinned <= _maxSize) { makeRoomIfFits(0_B); } From c7d58a419127c85c20231336dd027f3632210b6a Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 7 Aug 2024 23:25:34 +0200 Subject: [PATCH 091/133] Rework code (again) so that generator does not get cached --- src/engine/Operation.cpp | 104 +++---- src/engine/Operation.h | 7 +- src/engine/QueryExecutionContext.h | 31 +- src/engine/QueryExecutionTree.cpp | 7 +- src/engine/Result.cpp | 180 +++-------- src/engine/Result.h | 67 +--- src/index/CompressedRelation.cpp | 11 +- src/util/Cache.h | 58 +--- src/util/CacheableGenerator.h | 261 ++-------------- src/util/ConcurrentCache.h | 64 ++-- src/util/IteratorWrapper.h | 39 --- test/CMakeLists.txt | 2 - test/CacheTest.cpp | 184 ----------- test/CacheableGeneratorTest.cpp | 408 +------------------------ test/ConcurrentCacheTest.cpp | 42 ++- test/ExportQueryExecutionTreesTest.cpp | 55 ++-- test/IteratorWrapperTest.cpp | 52 ---- test/SparqlDataTypesTest.cpp | 17 +- 18 files changed, 247 insertions(+), 1342 deletions(-) delete mode 100644 src/util/IteratorWrapper.h delete mode 100644 test/IteratorWrapperTest.cpp diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index c8dab08d9e..61d9660e15 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -127,57 +127,33 @@ ProtoResult Operation::runComputation(ad_utility::Timer& timer, // _____________________________________________________________________________ CacheValue Operation::runComputationAndTransformToCache( ad_utility::Timer& timer, ComputationMode computationMode, - const std::string& cacheKey) { + const std::string& cacheKey, bool pinned) { auto& cache = _executionContext->getQueryTreeCache(); - CacheableResult result{runComputation(timer, computationMode), - cache.getMaxSizeSingleEntry().getBytes()}; - if (!result.isDataEvaluated()) { - result.setOnSizeChanged( - [&cache, cacheKey, runtimeInfo = getRuntimeInfoPointer()]( - std::optional duration) { - cache.recomputeSize(cacheKey); - if (duration.has_value()) { - runtimeInfo->totalTime_ += duration.value(); - } - }); - } - return CacheValue{std::move(result), runtimeInfo()}; -} - -// _____________________________________________________________________________ -Result Operation::extractFromCache( - std::shared_ptr result, bool freshlyInserted, - bool isRoot, ComputationMode computationMode) { - if (result->isDataEvaluated()) { - auto resultNumRows = result->idTable().size(); - auto resultNumCols = result->idTable().numColumns(); - LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " - << resultNumCols << std::endl; - } - - if (result->isDataEvaluated()) { - return Result::createResultWithFullyEvaluatedIdTable(std::move(result)); - } - - if (freshlyInserted) { - return Result::createResultAsMasterConsumer( - std::move(result), - isRoot ? std::function{[this]() { signalQueryUpdate(); }} - : std::function{}); - } - // TODO timer does not make sense here. - ad_utility::Timer timer{ad_utility::Timer::Started}; - return Result::createResultWithFallback( - std::move(result), - [this, timer = std::move(timer), computationMode]() mutable { - return runComputation(timer, computationMode); + auto result = Result::fromProtoResult( + runComputation(timer, computationMode), + [&cache](const IdTable& idTable) { + return cache.getMaxSizeSingleEntry() >= CacheValue::getSize(idTable); }, - [this, isRoot](auto duration) { + [this, &cache, cacheKey, pinned](Result aggregatedResult) { + cache.tryInsertIfNotPresent( + pinned, cacheKey, + CacheValue{std::move(aggregatedResult), runtimeInfo()}); + }); + /* + TODO incorporate time calculations and query updates. runtimeInfo().totalTime_ += duration; if (isRoot) { signalQueryUpdate(); } - }); + */ + if (result.isDataEvaluated()) { + auto resultNumRows = result.idTable().size(); + auto resultNumCols = result.idTable().numColumns(); + LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " + << resultNumCols << std::endl; + } + + return CacheValue{std::move(result), runtimeInfo()}; } // ________________________________________________________________________ @@ -212,46 +188,34 @@ std::shared_ptr Operation::getResult( updateRuntimeInformationOnFailure(timer.msecs()); } }); - bool actuallyComputed = false; - auto cacheSetup = [this, &timer, computationMode, &actuallyComputed, - &cacheKey]() { - actuallyComputed = true; - return runComputationAndTransformToCache(timer, computationMode, - cacheKey); + auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult]() { + return runComputationAndTransformToCache(timer, computationMode, cacheKey, + pinResult); }; - using ad_utility::CachePolicy; + auto suitedForCache = [](const CacheValue& cacheValue) { + return cacheValue.resultTable().isDataEvaluated(); + }; - CachePolicy cachePolicy = computationMode == ComputationMode::ONLY_IF_CACHED - ? CachePolicy::neverCompute - : CachePolicy::computeOnDemand; + bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; auto result = - pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, cachePolicy) - : cache.computeOnce(cacheKey, cacheSetup, cachePolicy); + pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, + onlyReadFromCache, suitedForCache) + : cache.computeOnce(cacheKey, cacheSetup, onlyReadFromCache, + suitedForCache); if (result._resultPointer == nullptr) { - AD_CORRECTNESS_CHECK(cachePolicy == CachePolicy::neverCompute); + AD_CORRECTNESS_CHECK(onlyReadFromCache); return nullptr; } - if (!result._resultPointer->resultTable().isDataEvaluated() && - computationMode == ComputationMode::FULLY_MATERIALIZED) { - AD_CORRECTNESS_CHECK(!actuallyComputed); - result = pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, - CachePolicy::alwaysCompute) - : cache.computeOnce(cacheKey, cacheSetup, - CachePolicy::alwaysCompute); - } - updateRuntimeInformationOnSuccess( result, result._resultPointer->resultTable().isDataEvaluated() ? timer.msecs() : result._resultPointer->runtimeInfo().totalTime_); - return std::make_shared( - extractFromCache(result._resultPointer->resultTablePtr(), - actuallyComputed, isRoot, computationMode)); + return result._resultPointer->resultTablePtr(); } catch (ad_utility::CancellationException& e) { e.setOperation(getDescriptor()); runtimeInfo().status_ = RuntimeInformation::Status::cancelled; diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 242355831e..2eb70d647e 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -265,11 +265,8 @@ class Operation { CacheValue runComputationAndTransformToCache(ad_utility::Timer& timer, ComputationMode computationMode, - const std::string& cacheKey); - - Result extractFromCache(std::shared_ptr result, - bool freshlyInserted, bool isRoot, - ComputationMode computationMode); + const std::string& cacheKey, + bool pinned); // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index b7b54af6bf..0c17ea684b 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -22,13 +22,12 @@ class CacheValue { private: - std::shared_ptr resultTable_; + std::shared_ptr result_; RuntimeInformation runtimeInfo_; public: - explicit CacheValue(CacheableResult resultTable, - RuntimeInformation runtimeInfo) - : resultTable_{std::make_shared(std::move(resultTable))}, + explicit CacheValue(Result result, RuntimeInformation runtimeInfo) + : result_{std::make_shared(std::move(result))}, runtimeInfo_{std::move(runtimeInfo)} {} CacheValue(CacheValue&&) = default; @@ -36,34 +35,26 @@ class CacheValue { CacheValue& operator=(CacheValue&&) = default; CacheValue& operator=(const CacheValue&) = delete; - const CacheableResult& resultTable() const noexcept { return *resultTable_; } + const Result& resultTable() const noexcept { return *result_; } - std::shared_ptr resultTablePtr() const noexcept { - return resultTable_; + std::shared_ptr resultTablePtr() const noexcept { + return result_; } const RuntimeInformation& runtimeInfo() const noexcept { return runtimeInfo_; } - ~CacheValue() { - if (resultTable_ && !resultTable_->isDataEvaluated()) { - // Clear listeners - try { - resultTable_->setOnSizeChanged({}); - } catch (...) { - // Should never happen. The listeners only throw assertion errors - // if the result is evaluated. - std::exit(1); - } - } + static ad_utility::MemorySize getSize(const IdTable& idTable) { + return ad_utility::MemorySize::bytes(idTable.size() * idTable.numColumns() * + sizeof(Id)); } // Calculates the `MemorySize` taken up by an instance of `CacheValue`. struct SizeGetter { ad_utility::MemorySize operator()(const CacheValue& cacheValue) const { - if (const auto& tablePtr = cacheValue.resultTable_; tablePtr) { - return tablePtr->getCurrentSize(); + if (const auto& resultPtr = cacheValue.result_; resultPtr) { + return getSize(resultPtr->idTable()); } else { return 0_B; } diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index dc018c8430..12392aeb6e 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -119,12 +119,7 @@ void QueryExecutionTree::readFromCache() { auto& cache = qec_->getQueryTreeCache(); auto res = cache.getIfContained(getCacheKey()); if (res.has_value()) { - auto resultTable = res->_resultPointer->resultTablePtr(); - if (resultTable->isDataEvaluated()) { - cachedResult_ = std::make_shared( - Result::createResultWithFullyEvaluatedIdTable( - std::move(resultTable))); - } + cachedResult_ = res->_resultPointer->resultTablePtr(); } } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index fe70f4479a..f530944f67 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -7,8 +7,8 @@ #include "engine/Result.h" #include "engine/LocalVocab.h" +#include "util/CacheableGenerator.h" #include "util/Exception.h" -#include "util/IteratorWrapper.h" #include "util/Log.h" #include "util/Timer.h" @@ -209,69 +209,61 @@ const IdTable& ProtoResult::idTable() const { return storage_.idTable(); } bool ProtoResult::isDataEvaluated() const noexcept { return storage_.isDataEvaluated(); } -// _____________________________________________________________________________ -CacheableResult::CacheableResult(ProtoResult protoResult, - uint64_t maxElementSize) - : storage_{StorageType{ - protoResult.isDataEvaluated() - ? decltype(StorageType::data_){std::move( - protoResult.storage_.idTable())} - : decltype(StorageType::data_){ad_utility::CacheableGenerator< - IdTable, SizeCalculator>{ - std::move(protoResult.storage_.idTables())}}, - std::move(protoResult.storage_.sortedBy_), - std::move(protoResult.storage_.localVocab_), - }} { - if (!storage_.isDataEvaluated()) { - storage_.idTables().setMaxSize(maxElementSize); - } -} - -// _____________________________________________________________________________ -void CacheableResult::setOnSizeChanged( - std::function duration)> - onSizeChanged) { - storage_.idTables().setOnSizeChanged(std::move(onSizeChanged)); -} - -// _____________________________________________________________________________ -const IdTable& CacheableResult::idTable() const { return storage_.idTable(); } - -// _____________________________________________________________________________ -const ad_utility::CacheableGenerator& -CacheableResult::idTables() const { - return storage_.idTables(); -} // _____________________________________________________________________________ -bool CacheableResult::isDataEvaluated() const noexcept { - return storage_.isDataEvaluated(); -} - -// _____________________________________________________________________________ -ad_utility::MemorySize CacheableResult::getCurrentSize() const { - return ad_utility::MemorySize::bytes( - storage_.isDataEvaluated() ? SizeCalculator{}(storage_.idTable()) - : storage_.idTables().getCurrentSize()); -} - -// _____________________________________________________________________________ -Result::Result(std::shared_ptr idTable, - std::vector sortedBy, LocalVocabPtr localVocab) +Result::Result(IdTable idTable, std::vector sortedBy, + LocalVocabPtr localVocab) : storage_{StorageType{std::move(idTable), std::move(sortedBy), std::move(localVocab)}} {} // _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, +Result::Result(cppcoro::generator idTables, std::vector sortedBy, LocalVocabPtr localVocab) : storage_{StorageType{std::move(idTables), std::move(sortedBy), std::move(localVocab)}} {} // _____________________________________________________________________________ -const IdTable& Result::idTable() const { return *storage_.idTable(); } +Result Result::fromProtoResult(ProtoResult protoResult, + std::function fitsInCache, + std::function storeInCache) { + if (protoResult.isDataEvaluated()) { + return Result{std::move(protoResult.storage_.idTable()), + std::move(protoResult.storage_.sortedBy_), + std::move(protoResult.storage_.localVocab_)}; + } + auto sortedByCopy = protoResult.storage_.sortedBy_; + auto localVocabReference = protoResult.storage_.localVocab_; + return Result{ + ad_utility::wrapGeneratorWithCache( + std::move(protoResult.storage_.idTables()), + [fitsInCache = std::move(fitsInCache)]( + std::optional& aggregate, const IdTable& newTable) { + if (aggregate.has_value()) { + aggregate.value().insertAtEnd(newTable); + } else { + aggregate.emplace(newTable.clone()); + } + return fitsInCache(aggregate.value()); + }, + [storeInCache = std::move(storeInCache), + sortedByCopy = std::move(sortedByCopy), + localVocabReference = std::move(localVocabReference)]( + std::optional idTable) mutable { + if (idTable.has_value()) { + storeInCache(Result{std::move(idTable).value(), + std::move(sortedByCopy), + std::move(localVocabReference)}); + } + }), + std::move(protoResult.storage_.sortedBy_), + std::move(protoResult.storage_.localVocab_)}; +} + +// _____________________________________________________________________________ +const IdTable& Result::idTable() const { return storage_.idTable(); } // _____________________________________________________________________________ -cppcoro::generator& Result::idTables() const { +cppcoro::generator& Result::idTables() const { return storage_.idTables(); } @@ -312,89 +304,3 @@ string Result::asDebugString() const { } return std::move(os).str(); } - -// _____________________________________________________________________________ -Result Result::createResultWithFullyEvaluatedIdTable( - std::shared_ptr cacheableResult) { - AD_CONTRACT_CHECK(cacheableResult->isDataEvaluated()); - auto sortedBy = cacheableResult->storage_.sortedBy_; - auto localVocab = cacheableResult->storage_.localVocab_; - const IdTable* tablePointer = &cacheableResult->idTable(); - return Result{ - std::shared_ptr{std::move(cacheableResult), tablePointer}, - std::move(sortedBy), std::move(localVocab)}; -} - -// _____________________________________________________________________________ -Result Result::createResultWithFallback( - std::shared_ptr original, - std::function fallback, - std::function onIteration) { - AD_CONTRACT_CHECK(!original->isDataEvaluated()); - auto generator = [](std::shared_ptr sharedResult, - std::function fallback, - auto onIteration) -> cppcoro::generator { - size_t index = 0; - try { - for (auto&& idTable : sharedResult->storage_.idTables()) { - co_yield *idTable; - index++; - } - co_return; - } catch (const ad_utility::IteratorExpired&) { - // co_yield is not allowed here, so simply ignore this and allow control - // flow to take over - } catch (...) { - throw; - } - ProtoResult freshResult = fallback(); - // If data is evaluated this means that this process is not deterministic - // or that there's a wrong callback used here. - AD_CORRECTNESS_CHECK(!freshResult.isDataEvaluated()); - auto start = std::chrono::steady_clock::now(); - for (auto&& idTable : freshResult.storage_.idTables()) { - auto stop = std::chrono::steady_clock::now(); - if (onIteration) { - onIteration(std::chrono::duration_cast( - stop - start)); - } - if (index > 0) { - index--; - continue; - } - co_yield idTable; - start = std::chrono::steady_clock::now(); - } - auto stop = std::chrono::steady_clock::now(); - if (onIteration) { - onIteration( - std::chrono::duration_cast(stop - start)); - } - }; - return Result{ - generator(original, std::move(fallback), std::move(onIteration)), - original->storage_.sortedBy_, original->storage_.localVocab_}; -} - -// _____________________________________________________________________________ -Result Result::createResultAsMasterConsumer( - std::shared_ptr original, - std::function onIteration) { - AD_CONTRACT_CHECK(!original->isDataEvaluated()); - auto generator = [](auto original, - auto onIteration) -> cppcoro::generator { - using ad_utility::IteratorWrapper; - auto& generator = original->storage_.idTables(); - for (std::shared_ptr idTable : - IteratorWrapper{generator, true}) { - if (onIteration) { - onIteration(); - } - co_yield *idTable; - } - }; - auto sortedBy = original->storage_.sortedBy_; - auto localVocab = original->storage_.localVocab_; - return Result{generator(std::move(original), std::move(onIteration)), - std::move(sortedBy), std::move(localVocab)}; -} diff --git a/src/engine/Result.h b/src/engine/Result.h index 25173c6375..a7cf837320 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -20,7 +20,6 @@ template class ResultStorage { friend class ProtoResult; - friend class CacheableResult; friend class Result; using Data = std::variant; @@ -67,7 +66,6 @@ class ResultStorage { }; class ProtoResult { - friend class CacheableResult; friend class Result; using StorageType = ResultStorage>; StorageType storage_; @@ -94,7 +92,6 @@ class ProtoResult { explicit SharedLocalVocabWrapper(LocalVocabPtr localVocab) : localVocab_{std::move(localVocab)} {} friend ProtoResult; - friend class CacheableResult; friend class Result; public: @@ -170,59 +167,21 @@ class ProtoResult { bool isDataEvaluated() const noexcept; }; -class CacheableResult { - friend class Result; - - struct SizeCalculator { - uint64_t operator()(const IdTable& idTable) const { - return idTable.size() * idTable.numColumns() * sizeof(Id); - } - }; - - using StorageType = - ResultStorage>; - StorageType storage_; - - public: - CacheableResult(const CacheableResult& other) = delete; - CacheableResult& operator=(const CacheableResult& other) = delete; - - CacheableResult(CacheableResult&& other) = default; - CacheableResult& operator=(CacheableResult&& other) = default; - - CacheableResult(ProtoResult protoResult, uint64_t maxElementSize); - - void setOnSizeChanged( - std::function)> - onSizeChanged); - - const IdTable& idTable() const; - - const ad_utility::CacheableGenerator& idTables() - const; - - bool isDataEvaluated() const noexcept; - - ad_utility::MemorySize getCurrentSize() const; -}; - // The result of an `Operation`. This is the class QLever uses for all // intermediate or final results when processing a SPARQL query. The actual data // is always a table and contained in the member `idTable()`. class Result { private: - using StorageType = ResultStorage, - cppcoro::generator>; + using StorageType = ResultStorage>; mutable StorageType storage_; using LocalVocabPtr = std::shared_ptr; using SharedLocalVocabWrapper = ProtoResult::SharedLocalVocabWrapper; - Result(std::shared_ptr idTable, - std::vector sortedBy, LocalVocabPtr localVocab); - Result(cppcoro::generator idTables, + Result(IdTable idTable, std::vector sortedBy, + LocalVocabPtr localVocab); + Result(cppcoro::generator idTables, std::vector sortedBy, LocalVocabPtr localVocab); public: @@ -234,11 +193,15 @@ class Result { Result(Result&& other) = default; Result& operator=(Result&& other) = default; + static Result fromProtoResult(ProtoResult protoResult, + std::function fitsInCache, + std::function storeInCache); + // Const access to the underlying `IdTable`. const IdTable& idTable() const; // Access to the underlying `IdTable`s. - cppcoro::generator& idTables() const; + cppcoro::generator& idTables() const; // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { @@ -295,16 +258,4 @@ class Result { // The first rows of the result and its total size (for debugging). string asDebugString() const; - - static Result createResultWithFullyEvaluatedIdTable( - std::shared_ptr cacheableResult); - - static Result createResultWithFallback( - std::shared_ptr original, - std::function fallback, - std::function onIteration); - - static Result createResultAsMasterConsumer( - std::shared_ptr original, - std::function onIteration); }; diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index 482e8d9ffb..493eef57c4 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -395,11 +395,12 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( auto cacheKey = blockMetadata.offsetsAndCompressedSize_.at(0).offsetInFile_; auto sharedResultFromCache = blockCache_ - .computeOnce(cacheKey, - [&]() { - return readAndDecompressBlock(blockMetadata, - allColumns); - }) + .computeOnce( + cacheKey, + [&]() { + return readAndDecompressBlock(blockMetadata, allColumns); + }, + false, [](const auto&) { return true; }) ._resultPointer; const DecompressedBlock& block = *sharedResultFromCache; diff --git a/src/util/Cache.h b/src/util/Cache.h index 2de3a307a2..23750eea16 100644 --- a/src/util/Cache.h +++ b/src/util/Cache.h @@ -6,8 +6,6 @@ #pragma once -#include - #include #include #include @@ -170,8 +168,7 @@ class FlexibleCache { return {}; } Score s = _scoreCalculator(*valPtr); - _totalSizeNonPinned += sizeOfNewEntry; - _sizeMap.emplace(key, sizeOfNewEntry); + _totalSizeNonPinned += _valueSizeGetter(*valPtr); auto handle = _entries.insert(std::move(s), Entry(key, std::move(valPtr))); _accessMap[key] = handle; // The first value is the value part of the key-value pair in the priority @@ -201,8 +198,7 @@ class FlexibleCache { // Make room for the new entry. makeRoomIfFits(sizeOfNewEntry); _pinnedMap[key] = valPtr; - _totalSizePinned += sizeOfNewEntry; - _sizeMap.emplace(key, sizeOfNewEntry); + _totalSizePinned += _valueSizeGetter(*valPtr); return valPtr; } @@ -230,33 +226,6 @@ class FlexibleCache { return _maxSizeSingleEntry; } - void recomputeSize(const Key& key) { - // Pinned entries must not be dynamic in nature - AD_CONTRACT_CHECK(!containsPinned(key)); - if (!containsNonPinned(key)) { - return; - } - auto newSize = _valueSizeGetter(*(*this)[key]); - auto& sizeInMap = _sizeMap.at(key); - // Entry has grown too big to completely keep within the cache or we can't - // fit it in the cache - if (_maxSizeSingleEntry < newSize || - _maxSize - std::min(_totalSizePinned, _maxSize) < newSize) { - erase(key); - return; - } - - // `MemorySize` type does not allow for negative values, but they are safe - // here, so we do it in size_t instead and convert back. - _totalSizeNonPinned = - MemorySize::bytes(_totalSizeNonPinned.getBytes() - - sizeInMap.getBytes() + newSize.getBytes()); - sizeInMap = newSize; - if (_totalSizePinned <= _maxSize) { - makeRoomIfFits(0_B); - } - } - //! Checks if there is an entry with the given key. bool contains(const Key& key) const { return containsPinned(key) || containsNonPinned(key); @@ -281,7 +250,7 @@ class FlexibleCache { const ValuePtr valuePtr = handle.value().value(); // adapt the sizes of the pinned and non-pinned part of the cache - auto sz = _sizeMap.at(key); + auto sz = _valueSizeGetter(*valuePtr); _totalSizeNonPinned -= sz; _totalSizePinned += sz; // Move the entry to the _pinnedMap and remove it from the non-pinned data @@ -297,8 +266,7 @@ class FlexibleCache { void erase(const Key& key) { const auto pinnedIt = _pinnedMap.find(key); if (pinnedIt != _pinnedMap.end()) { - _totalSizePinned -= _sizeMap.at(key); - _sizeMap.erase(key); + _totalSizePinned -= _valueSizeGetter(*pinnedIt->second); _pinnedMap.erase(pinnedIt); return; } @@ -309,8 +277,7 @@ class FlexibleCache { return; } // the entry exists in the non-pinned part of the cache, erase it. - _totalSizeNonPinned -= _sizeMap.at(key); - _sizeMap.erase(key); + _totalSizeNonPinned -= _valueSizeGetter(*mapIt->second); _entries.erase(std::move(mapIt->second)); _accessMap.erase(mapIt); } @@ -417,8 +384,8 @@ class FlexibleCache { void removeOneEntry() { AD_CONTRACT_CHECK(!_entries.empty()); auto handle = _entries.pop(); - _totalSizeNonPinned -= _sizeMap.at(handle.value().key()); - _sizeMap.erase(handle.value().key()); + _totalSizeNonPinned = + _totalSizeNonPinned - _valueSizeGetter(*handle.value().value()); _accessMap.erase(handle.value().key()); } size_t _maxNumEntries; @@ -434,17 +401,6 @@ class FlexibleCache { ValueSizeGetter _valueSizeGetter; PinnedMap _pinnedMap; AccessMap _accessMap; - SizeMap _sizeMap; - - FRIEND_TEST(LRUCacheTest, - verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErased); - - FRIEND_TEST(LRUCacheTest, - verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErasedPinned); - FRIEND_TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed); - FRIEND_TEST(LRUCacheTest, - verifyNonPinnedEntriesAreRemovedToMakeRoomForResize); - FRIEND_TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement); }; // Partial instantiation of FlexibleCache using the heap-based priority queue diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index c84aabf0cc..32f456a727 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -4,255 +4,32 @@ #pragma once -#include -#include #include -#include -#include -#include "util/Exception.h" #include "util/Generator.h" -#include "util/Synchronized.h" -#include "util/Timer.h" -#include "util/UniqueCleanup.h" +#include "util/TypeTraits.h" namespace ad_utility { -/// Custom exception type that indicates the consumer took too long to consume -/// the generator. -class IteratorExpired : public std::exception {}; - -/// Lambda-like type that always returns 1 to indicate size 1 for every element -/// in the `CacheableGenerator`. template -struct DefaultSizeCounter { - uint64_t operator()(const std::remove_reference_t&) const { return 1; } -}; - -/// Range-like type that allows multiple consumers to consume the same -/// single-consumption generator asynchronously. -template &> - SizeCounter = DefaultSizeCounter> -class CacheableGenerator { - using GenIterator = typename cppcoro::generator::iterator; - - enum class MasterIteratorState { NOT_STARTED, MASTER_STARTED, MASTER_DONE }; - - class ComputationStorage { - friend CacheableGenerator; - mutable std::shared_mutex mutex_; - std::condition_variable_any conditionVariable_; - cppcoro::generator generator_; - std::optional generatorIterator_{}; - std::vector> cachedValues_{}; - MasterIteratorState masterState_ = MasterIteratorState::NOT_STARTED; - SizeCounter sizeCounter_{}; - std::atomic currentSize_ = 0; - uint64_t maxSize_ = std::numeric_limits::max(); - std::function)> - onSizeChanged_{}; - - public: - explicit ComputationStorage(cppcoro::generator generator) - : generator_{std::move(generator)} {} - ComputationStorage(ComputationStorage&& other) = delete; - ComputationStorage(const ComputationStorage& other) = delete; - ComputationStorage& operator=(ComputationStorage&& other) = delete; - ComputationStorage& operator=(const ComputationStorage& other) = delete; - - private: - void advanceTo(size_t index, bool isMaster) { - std::unique_lock lock{mutex_}; - AD_CONTRACT_CHECK(index <= cachedValues_.size()); - // Make sure master iterator does exist and we're not blocking - // indefinitely - if (isMaster) { - AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::MASTER_DONE); - masterState_ = MasterIteratorState::MASTER_STARTED; - } else { - AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::NOT_STARTED); - } - if (index < cachedValues_.size()) { - if (!cachedValues_.at(index)) { - throw IteratorExpired{}; - } - return; - } - if (generatorIterator_.has_value() && - generatorIterator_.value() == generator_.end()) { - return; - } - if (masterState_ == MasterIteratorState::MASTER_STARTED && !isMaster) { - conditionVariable_.wait(lock, [this, index]() { - return (generatorIterator_.has_value() && - generatorIterator_.value() == generator_.end()) || - index < cachedValues_.size(); - }); - return; - } - Timer timer{Timer::Started}; - if (generatorIterator_.has_value()) { - AD_CONTRACT_CHECK(generatorIterator_.value() != generator_.end()); - ++generatorIterator_.value(); - } else { - generatorIterator_ = generator_.begin(); - } - timer.stop(); - if (generatorIterator_.value() != generator_.end()) { - auto pointer = - std::make_shared(std::move(*generatorIterator_.value())); - currentSize_.fetch_add(sizeCounter_(*pointer)); - cachedValues_.push_back(std::move(pointer)); - if (onSizeChanged_) { - onSizeChanged_(std::chrono::milliseconds{timer.msecs()}); - } - tryShrinkCacheIfNeccessary(); - } - if (isMaster) { - lock.unlock(); - conditionVariable_.notify_all(); - } - } - - std::shared_ptr getCachedValue(size_t index) const { - std::shared_lock lock{mutex_}; - if (!cachedValues_.at(index)) { - throw IteratorExpired{}; - } - return cachedValues_.at(index); - } - - // Needs to be public in order to compile with gcc 11 & 12 - public: - bool isDone(size_t index) noexcept { - std::shared_lock lock{mutex_}; - return index >= cachedValues_.size() && generatorIterator_.has_value() && - generatorIterator_.value() == generator_.end(); - } - - private: - void clearMaster() { - std::unique_lock lock{mutex_}; - AD_CORRECTNESS_CHECK(masterState_ != MasterIteratorState::MASTER_DONE); - masterState_ = MasterIteratorState::MASTER_DONE; - lock.unlock(); - conditionVariable_.notify_all(); - } - - void setOnSizeChanged( - std::function)> - onSizeChanged) noexcept { - std::unique_lock lock{mutex_}; - onSizeChanged_ = std::move(onSizeChanged); - } - - void tryShrinkCacheIfNeccessary() { - if (currentSize_ <= maxSize_) { - return; - } - size_t maxBound = cachedValues_.size() - 1; - for (size_t i = 0; i < maxBound; i++) { - auto& pointer = cachedValues_.at(i); - if (pointer) { - currentSize_.fetch_add(sizeCounter_(*pointer)); - pointer.reset(); - if (onSizeChanged_) { - onSizeChanged_(std::nullopt); - } - if (currentSize_ <= maxSize_ || i >= maxBound - 1) { - break; - } - } - } - } - - void setMaxSize(uint64_t maxSize) { - std::unique_lock lock{mutex_}; - maxSize_ = maxSize; - } - }; - - std::shared_ptr computationStorage_; - - public: - explicit CacheableGenerator(cppcoro::generator generator) - : computationStorage_{ - std::make_shared(std::move(generator))} {} - - CacheableGenerator(CacheableGenerator&& other) noexcept = default; - CacheableGenerator(const CacheableGenerator& other) noexcept = delete; - CacheableGenerator& operator=(CacheableGenerator&& other) noexcept = default; - CacheableGenerator& operator=(const CacheableGenerator& other) noexcept = - delete; - - class IteratorSentinel {}; - - class Iterator { - size_t currentIndex_ = 0; - unique_cleanup::UniqueCleanup> storage_; - bool isMaster_; - - auto storage() const { - auto pointer = storage_->lock(); - AD_CORRECTNESS_CHECK(pointer); - return pointer; - } - - public: - explicit Iterator(std::weak_ptr storage, bool isMaster) - : storage_{std::move(storage), - [isMaster](auto&& storage) { - if (isMaster) { - auto pointer = storage.lock(); - AD_CORRECTNESS_CHECK(pointer); - pointer->clearMaster(); - } - }}, - isMaster_{isMaster} { - this->storage()->advanceTo(currentIndex_, isMaster); - } - - friend bool operator==(const Iterator& it, IteratorSentinel) noexcept { - return it.storage()->isDone(it.currentIndex_); - } - - friend bool operator==(IteratorSentinel s, const Iterator& it) noexcept { - return (it == s); - } - - Iterator& operator++() { - ++currentIndex_; - storage()->advanceTo(currentIndex_, isMaster_); - return *this; - } - - // Need to provide post-increment operator to implement the 'Range' concept. - void operator++(int) { (void)operator++(); } - - std::shared_ptr operator*() const { - return storage()->getCachedValue(currentIndex_); - } - }; - - Iterator begin(bool isMaster = false) const { - return Iterator{computationStorage_, isMaster}; +cppcoro::generator wrapGeneratorWithCache( + cppcoro::generator generator, + InvocableWithExactReturnType&, const T&> auto + aggregator, + InvocableWithExactReturnType> auto onFullyCached) { + std::optional aggregatedData{}; + bool aggregate = true; + for (auto&& element : generator) { + if (aggregate) { + aggregate = aggregator(aggregatedData, element); + if (!aggregate) { + aggregatedData.reset(); + } + } + co_yield AD_FWD(element); } - - IteratorSentinel end() const noexcept { return IteratorSentinel{}; } - - void setOnSizeChanged( - std::function)> - onSizeChanged) noexcept { - computationStorage_->setOnSizeChanged(std::move(onSizeChanged)); - } - - uint64_t getCurrentSize() const { - return computationStorage_->currentSize_.load(); - } - - void setMaxSize(uint64_t maxSize) { - computationStorage_->setMaxSize(maxSize); + if (aggregate) { + onFullyCached(std::move(aggregatedData)); } -}; +} }; // namespace ad_utility diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 88837063a3..bc61265c2d 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -43,12 +43,6 @@ enum struct CacheStatus { notInCacheAndNotComputed }; -enum class CachePolicy { - neverCompute, - computeOnDemand, - alwaysCompute, -}; - // Convert a `CacheStatus` to a human-readable string. We mostly use it for // JSON exports, so we use a hyphenated format. constexpr std::string_view toString(CacheStatus status) { @@ -131,7 +125,7 @@ class ResultInProgress { std::unique_lock uniqueLock(_mutex); _conditionVariable.wait(uniqueLock, [this] { return _status != Status::IN_PROGRESS; }); - if (_status == ResultInProgress::Status::ABORTED) { + if (_status == Status::ABORTED) { throw WaitedForResultWhichThenFailedException{}; } return _result; @@ -185,13 +179,19 @@ class ConcurrentCache { * it is contained in the cache. Otherwise `nullptr` with a cache status of * `notInCacheNotComputed` will be returned. * @return A shared_ptr to the computation result. + * @param suitedForCache Predicate function that will be applied to newly + * computed value to check if it is suited for caching. + * @return A `ResultAndCacheStatus` shared_ptr to the computation result. * */ ResultAndCacheStatus computeOnce( const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, - CachePolicy cachePolicy = CachePolicy::computeOnDemand) { - return computeOnceImpl(false, key, computeFunction, cachePolicy); + bool onlyReadFromCache, + const InvocableWithConvertibleReturnType auto& + suitedForCache) { + return computeOnceImpl(false, key, computeFunction, onlyReadFromCache, + suitedForCache); } /// Similar to computeOnce, with the following addition: After the call @@ -199,12 +199,23 @@ class ConcurrentCache { ResultAndCacheStatus computeOncePinned( const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, - CachePolicy cachePolicy = CachePolicy::computeOnDemand) { - return computeOnceImpl(true, key, computeFunction, cachePolicy); + bool onlyReadFromCache, + const InvocableWithConvertibleReturnType auto& + suitedForCache) { + return computeOnceImpl(true, key, computeFunction, onlyReadFromCache, + suitedForCache); } - void recomputeSize(const Key& key) { - _cacheAndInProgressMap.wlock()->_cache.recomputeSize(key); + void tryInsertIfNotPresent(bool pinned, const Key& key, Value value) { + auto lockPtr = _cacheAndInProgressMap.wlock(); + if (pinned) { + if (!lockPtr->_cache.containsAndMakePinnedIfExists(key)) { + lockPtr->_cache.insertPinned(key, + std::make_shared(std::move(value))); + } + } else if (!lockPtr->_cache.contains(key)) { + lockPtr->_cache.insert(key, std::make_shared(std::move(value))); + } } /// Clear the cache (but not the pinned entries) @@ -324,7 +335,9 @@ class ConcurrentCache { ResultAndCacheStatus computeOnceImpl( bool pinned, const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, - CachePolicy cachePolicy) { + bool onlyReadFromCache, + const InvocableWithConvertibleReturnType auto& + suitedForCache) { using std::make_shared; bool mustCompute; shared_ptr resultInProgress; @@ -341,10 +354,9 @@ class ConcurrentCache { if (contained) { // the result is in the cache, simply return it. return {cache[key], cacheStatus}; - } else if (cachePolicy == CachePolicy::neverCompute) { + } else if (onlyReadFromCache) { return {nullptr, CacheStatus::notInCacheAndNotComputed}; - } else if (lockPtr->_inProgress.contains(key) && - cachePolicy == CachePolicy::computeOnDemand) { + } else if (lockPtr->_inProgress.contains(key)) { // the result is not cached, but someone else is computing it. // it is important, that we do not immediately call getResult() since // this call blocks and we currently hold a lock. @@ -368,7 +380,11 @@ class ConcurrentCache { try { // The actual computation shared_ptr result = make_shared(computeFunction()); - moveFromInProgressToCache(key, result); + if (suitedForCache(*result)) { + moveFromInProgressToCache(key, result); + } else { + _cacheAndInProgressMap.wlock()->_inProgress.erase(key); + } // Signal other threads who are waiting for the results. resultInProgress->finish(result); // result was not cached @@ -384,7 +400,17 @@ class ConcurrentCache { // someone else is computing the result, wait till it is finished and // return the result, we do not count this case as "cached" as we had to // wait. - return {resultInProgress->getResult(), CacheStatus::computed}; + auto resultPointer = resultInProgress->getResult(); + if (resultPointer) { + return {std::move(resultPointer), CacheStatus::computed}; + } + // TODO there's a small chance this will hang indefinitely if + // other processes keep submitting non-cacheable entries before this + // thread can acquire the lock to compute the entry on its own. + + // Retry if computed entry unsuited for caching + return computeOnceImpl(pinned, key, computeFunction, false, + suitedForCache); } } diff --git a/src/util/IteratorWrapper.h b/src/util/IteratorWrapper.h deleted file mode 100644 index 2d177fb7b3..0000000000 --- a/src/util/IteratorWrapper.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Robin Textor-Falconi - -#pragma once - -#include - -#include "Exception.h" - -namespace ad_utility { - -/// Helper class allowing to use range-like datastructures with arguments for -/// their begin() member function within range-based for loops like this: -/// -/// This calls something.begin(1, 2, 3): -/// for (auto elem : IteratorWrapper{something, 1, 2, 3}) {} -template -class IteratorWrapper { - bool used_ = false; - OriginalIterable& iterable_; - std::tuple args_; - - public: - explicit IteratorWrapper(OriginalIterable& iterator, Args... args) - : iterable_{iterator}, args_{std::move(args)...} {} - - auto begin() { - AD_CONTRACT_CHECK(!used_); - used_ = true; - return std::apply( - [this](auto... args) { return iterable_.begin(std::move(args)...); }, - std::move(args_)); - } - - auto end() { return iterable_.end(); } -}; - -}; // namespace ad_utility diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9e9c48dc96..cc859dbcf5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -395,8 +395,6 @@ addLinkAndDiscoverTest(FsstCompressorTest fsst) addLinkAndDiscoverTest(CopyableSynchronizationTest) -addLinkAndDiscoverTest(IteratorWrapperTest) - addLinkAndDiscoverTest(CacheableGeneratorTest) addLinkAndDiscoverTest(FilterTest) diff --git a/test/CacheTest.cpp b/test/CacheTest.cpp index fbdab252e5..de6491481c 100644 --- a/test/CacheTest.cpp +++ b/test/CacheTest.cpp @@ -13,11 +13,6 @@ using std::string; using namespace ad_utility::memory_literals; -using Vec = std::vector; -[[maybe_unused]] auto vectorSizeGetter = [](const auto& pointer) { - return pointer->size() * sizeof(int) * 1_B; -}; - // first some simple Tests for the general cache interface TEST(FlexibleCacheTest, Simple) { auto accessUpdater = [](const auto& s, [[maybe_unused]] const auto& v) { @@ -143,183 +138,4 @@ TEST(LRUCacheTest, testDecreasingCapacity) { ASSERT_FALSE(cache["3"]); ASSERT_FALSE(cache["4"]); } - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErased) { - LRUCache>, decltype(vectorSizeGetter)> - cache{1}; - - auto vecA = std::make_shared(); - - cache.insert(0, vecA); - - ASSERT_EQ(cache._totalSizeNonPinned, 0_B); - vecA->push_back(0); - - // Cache does was not notified about the size change - ASSERT_EQ(cache._totalSizeNonPinned, 0_B); - - cache.erase(0); - - // Cache should not underflow - ASSERT_EQ(cache._totalSizeNonPinned, 0_B); - - cache.insert(0, vecA); - - ASSERT_EQ(cache._totalSizeNonPinned, 4_B); - vecA->clear(); - - // Cache does was not notified about the size change - ASSERT_EQ(cache._totalSizeNonPinned, 4_B); - - cache.erase(0); - - // Cache correctly remove size, even though the vector is empty by now. - ASSERT_EQ(cache._totalSizeNonPinned, 0_B); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, - verifyCacheSizeIsCorrectlyTrackedWhenChangedWhenErasedPinned) { - LRUCache>, decltype(vectorSizeGetter)> - cache{1}; - - auto vecA = std::make_shared(); - - cache.insertPinned(0, vecA); - - ASSERT_EQ(cache._totalSizePinned, 0_B); - vecA->push_back(0); - - // Cache does was not notified about the size change - ASSERT_EQ(cache._totalSizePinned, 0_B); - - cache.erase(0); - - // Cache should not underflow - ASSERT_EQ(cache._totalSizePinned, 0_B); - - cache.insertPinned(0, vecA); - - ASSERT_EQ(cache._totalSizePinned, 4_B); - vecA->clear(); - - // Cache does was not notified about the size change - ASSERT_EQ(cache._totalSizePinned, 4_B); - - cache.erase(0); - - // Cache correctly remove size, even though the vector is empty by now. - ASSERT_EQ(cache._totalSizePinned, 0_B); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyCacheSizeIsCorrectlyRecomputed) { - LRUCache>, decltype(vectorSizeGetter)> - cache{3, 12_B, 8_B}; - - auto vecA = std::make_shared(0); - auto vecB = std::make_shared(1); - - cache.insert(0, vecA); - cache.insert(1, vecB); - - ASSERT_EQ(cache._totalSizeNonPinned, 4_B); - - vecA->resize(1); - vecB->resize(2); - - // Cache does was not notified about the size change - ASSERT_EQ(cache._totalSizeNonPinned, 4_B); - - cache.recomputeSize(0); - - ASSERT_EQ(cache._totalSizeNonPinned, 8_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - - vecA->resize(2); - - cache.recomputeSize(0); - - ASSERT_EQ(cache._totalSizeNonPinned, 12_B); - ASSERT_TRUE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - - cache.recomputeSize(1); - - ASSERT_EQ(cache._totalSizeNonPinned, 8_B); - ASSERT_FALSE(cache.contains(0)); - ASSERT_TRUE(cache.contains(1)); - - vecB->resize(3); - cache.recomputeSize(1); - - ASSERT_EQ(cache._totalSizeNonPinned, 0_B); - ASSERT_FALSE(cache.contains(0)); - ASSERT_FALSE(cache.contains(1)); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyNonPinnedEntriesAreRemovedToMakeRoomForResize) { - LRUCache>, decltype(vectorSizeGetter)> - cache{3, 8_B, 4_B}; - - auto vecA = std::make_shared(1); - auto vecB = std::make_shared(1); - auto vecC = std::make_shared(0); - - cache.insertPinned(0, vecA); - cache.insert(1, vecB); - cache.insert(2, vecC); - - vecC->resize(1); - - cache.recomputeSize(2); - ASSERT_TRUE(cache.contains(0)); - ASSERT_FALSE(cache.contains(1)); - ASSERT_TRUE(cache.contains(2)); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyRecomputeIsNoOpForNonExistentElement) { - LRUCache> cache{1}; - cache.insert("1", "a"); - - cache.recomputeSize("2"); - - EXPECT_TRUE(cache.contains("1")); - EXPECT_FALSE(cache.contains("2")); -} - -TEST(LRUCacheTest, verifyRecomputeDoesNoticeExceedingSizeOnShrink) { - LRUCache>, decltype(vectorSizeGetter)> - cache{3, 32_B, 16_B}; - - auto vecA = std::make_shared(2); - auto vecB = std::make_shared(1); - auto vecC = std::make_shared(4); - - cache.insert(0, vecA); - cache.insert(1, vecB); - cache.insert(2, vecC); - - cache.setMaxSizeSingleEntry(8_B); - vecC->resize(3); - cache.recomputeSize(2); - - EXPECT_TRUE(cache.contains(0)); - EXPECT_TRUE(cache.contains(1)); - EXPECT_FALSE(cache.contains(2)); -} - -// _____________________________________________________________________________ -TEST(LRUCacheTest, verifyRecomputeDoesErrorOutWhenPinned) { - LRUCache cache{3, 12_B, - 8_B}; - - cache.insertPinned(0, 0); - - EXPECT_THROW(cache.recomputeSize(0), ad_utility::Exception); -} } // namespace ad_utility diff --git a/test/CacheableGeneratorTest.cpp b/test/CacheableGeneratorTest.cpp index 4ad86d1b2a..dc07ecfe60 100644 --- a/test/CacheableGeneratorTest.cpp +++ b/test/CacheableGeneratorTest.cpp @@ -4,13 +4,10 @@ #include -#include - #include "util/CacheableGenerator.h" #include "util/Generator.h" -#include "util/jthread.h" -using ad_utility::CacheableGenerator; +using ad_utility::wrapGeneratorWithCache; using cppcoro::generator; using namespace std::chrono_literals; @@ -20,394 +17,17 @@ generator testGenerator(uint32_t range) { } } -// _____________________________________________________________________________ -TEST(CacheableGenerator, allowsMultiConsumption) { - CacheableGenerator generator{testGenerator(3)}; - - auto iterator1 = generator.begin(true); - - ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(**iterator1, 0); - ++iterator1; - - ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(**iterator1, 1); - ++iterator1; - - ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(**iterator1, 2); - ++iterator1; - - EXPECT_EQ(iterator1, generator.end()); - - auto iterator2 = generator.begin(false); - - ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(**iterator2, 0); - ++iterator2; - - ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(**iterator2, 1); - ++iterator2; - - ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(**iterator2, 2); - ++iterator2; - EXPECT_EQ(iterator2, generator.end()); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, masterBlocksSlaves) { - CacheableGenerator generator{testGenerator(3)}; - - // Verify slave is not blocked indefinitely if master has not been started yet - EXPECT_THROW(generator.begin(false), ad_utility::Exception); - - auto masterIterator = generator.begin(true); - std::mutex counterMutex; - std::condition_variable cv; - std::atomic_int counter = 0; - uint32_t proceedStage = 0; - - ad_utility::JThread thread1{[&]() { - auto iterator = generator.begin(false); - - ASSERT_NE(iterator, generator.end()); - { - std::lock_guard guard{counterMutex}; - EXPECT_EQ(counter, 0); - proceedStage = 1; - } - cv.notify_all(); - - EXPECT_EQ(**iterator, 0); - ++iterator; - - ASSERT_NE(iterator, generator.end()); - { - std::lock_guard guard{counterMutex}; - EXPECT_EQ(counter, 1); - proceedStage = 2; - } - cv.notify_all(); - - EXPECT_EQ(**iterator, 1); - ++iterator; - - ASSERT_NE(iterator, generator.end()); - { - std::lock_guard guard{counterMutex}; - EXPECT_EQ(counter, 2); - proceedStage = 3; - } - cv.notify_all(); - - EXPECT_EQ(**iterator, 2); - ++iterator; - - EXPECT_EQ(iterator, generator.end()); - { - std::lock_guard guard{counterMutex}; - EXPECT_EQ(counter, 3); - } - }}; - - ad_utility::JThread thread2{[&]() { - auto iterator = generator.begin(false); - - ASSERT_NE(iterator, generator.end()); - EXPECT_GE(counter, 0); - - EXPECT_EQ(**iterator, 0); - ++iterator; - - ASSERT_NE(iterator, generator.end()); - EXPECT_GE(counter, 1); - - EXPECT_EQ(**iterator, 1); - ++iterator; - - ASSERT_NE(iterator, generator.end()); - EXPECT_GE(counter, 2); - - EXPECT_EQ(**iterator, 2); - ++iterator; - - EXPECT_EQ(iterator, generator.end()); - EXPECT_GE(counter, 3); - }}; - - EXPECT_EQ(**masterIterator, 0); - - { - std::unique_lock guard{counterMutex}; - cv.wait(guard, [&]() { return proceedStage == 1; }); - ++counter; - ++masterIterator; - } - ASSERT_NE(masterIterator, generator.end()); - - EXPECT_EQ(**masterIterator, 1); - { - std::unique_lock guard{counterMutex}; - cv.wait(guard, [&]() { return proceedStage == 2; }); - ++counter; - ++masterIterator; - } - ASSERT_NE(masterIterator, generator.end()); - - EXPECT_EQ(**masterIterator, 2); - { - std::unique_lock guard{counterMutex}; - cv.wait(guard, [&]() { return proceedStage == 3; }); - ++counter; - ++masterIterator; - } - EXPECT_EQ(masterIterator, generator.end()); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyExhaustedMasterCausesFreeForAll) { - CacheableGenerator generator{testGenerator(3)}; - - (void)generator.begin(true); - - auto iterator1 = generator.begin(false); - auto iterator2 = generator.begin(false); - - ASSERT_NE(iterator1, generator.end()); - ASSERT_NE(iterator2, generator.end()); - - EXPECT_EQ(**iterator1, 0); - EXPECT_EQ(**iterator2, 0); - - ++iterator1; - ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(**iterator1, 1); - - ++iterator2; - ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(**iterator2, 1); - - ++iterator2; - ASSERT_NE(iterator2, generator.end()); - EXPECT_EQ(**iterator2, 2); - - ++iterator1; - ASSERT_NE(iterator1, generator.end()); - EXPECT_EQ(**iterator1, 2); - - ++iterator1; - EXPECT_EQ(iterator1, generator.end()); - - ++iterator2; - EXPECT_EQ(iterator2, generator.end()); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyOnSizeChangedIsCalledWithCorrectTimingInfo) { - auto timedGenerator = []() -> generator { - while (true) { -#ifndef _QLEVER_NO_TIMING_TESTS - std::this_thread::sleep_for(2ms); -#endif - co_yield 0; - } - }(); - - uint32_t callCounter = 0; - - CacheableGenerator generator{std::move(timedGenerator)}; - - generator.setOnSizeChanged([&](auto duration) { -#ifndef _QLEVER_NO_TIMING_TESTS - using ::testing::AllOf; - using ::testing::Le; - using ::testing::Ge; - EXPECT_THAT(duration, AllOf(Le(3ms), Ge(1ms))); -#endif - ++callCounter; - }); - - { - auto masterIterator = generator.begin(true); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(masterIterator, generator.end()); - - ++masterIterator; - - EXPECT_EQ(callCounter, 2); - ASSERT_NE(masterIterator, generator.end()); - } - - { - auto slaveIterator1 = generator.begin(); - EXPECT_EQ(callCounter, 2); - ASSERT_NE(slaveIterator1, generator.end()); - - auto slaveIterator2 = generator.begin(); - EXPECT_EQ(callCounter, 2); - ASSERT_NE(slaveIterator2, generator.end()); - - ++slaveIterator2; - - EXPECT_EQ(callCounter, 2); - ASSERT_NE(slaveIterator2, generator.end()); - - ++slaveIterator2; - - EXPECT_EQ(callCounter, 3); - ASSERT_NE(slaveIterator2, generator.end()); - - ++slaveIterator1; - - EXPECT_EQ(callCounter, 3); - ASSERT_NE(slaveIterator1, generator.end()); - - ++slaveIterator1; - - EXPECT_EQ(callCounter, 3); - ASSERT_NE(slaveIterator1, generator.end()); - - ++slaveIterator1; - - EXPECT_EQ(callCounter, 4); - ASSERT_NE(slaveIterator1, generator.end()); - } - - auto slaveIterator3 = generator.begin(); - EXPECT_EQ(callCounter, 4); - ASSERT_NE(slaveIterator3, generator.end()); - - ++slaveIterator3; - - EXPECT_EQ(callCounter, 4); - ASSERT_NE(slaveIterator3, generator.end()); - - ++slaveIterator3; - - EXPECT_EQ(callCounter, 4); - ASSERT_NE(slaveIterator3, generator.end()); - - ++slaveIterator3; - - EXPECT_EQ(callCounter, 4); - ASSERT_NE(slaveIterator3, generator.end()); - - ++slaveIterator3; - - EXPECT_EQ(callCounter, 5); - ASSERT_NE(slaveIterator3, generator.end()); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyOnSizeChangedIsCalledAndRespectsShrink) { - CacheableGenerator generator{testGenerator(3)}; - uint32_t callCounter = 0; - generator.setOnSizeChanged([&](auto) { ++callCounter; }); - - auto iterator = generator.begin(true); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(iterator, generator.end()); - - auto slaveIterator1 = generator.begin(); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(slaveIterator1, generator.end()); - EXPECT_EQ(**slaveIterator1, 0); - - ++iterator; - EXPECT_EQ(callCounter, 2); - ASSERT_NE(iterator, generator.end()); - - generator.setMaxSize(1); - - ++slaveIterator1; - EXPECT_EQ(callCounter, 2); - ASSERT_NE(slaveIterator1, generator.end()); - EXPECT_EQ(**slaveIterator1, 1); - - auto slaveIterator2 = generator.begin(); - EXPECT_EQ(callCounter, 2); - ASSERT_NE(slaveIterator2, generator.end()); - EXPECT_EQ(**slaveIterator2, 0); - - ++iterator; - EXPECT_EQ(callCounter, 5); - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(**iterator, 2); - - ++iterator; - EXPECT_EQ(callCounter, 5); - EXPECT_EQ(iterator, generator.end()); - - ++slaveIterator1; - ASSERT_NE(slaveIterator1, generator.end()); - EXPECT_EQ(**slaveIterator1, 2); - - EXPECT_THROW(++slaveIterator2, ad_utility::IteratorExpired); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifyShrinkKeepsSingleElement) { - CacheableGenerator generator{testGenerator(3)}; - uint32_t callCounter = 0; - generator.setOnSizeChanged([&](auto) { ++callCounter; }); - - auto iterator = generator.begin(true); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(iterator, generator.end()); - - auto slaveIterator = generator.begin(); - EXPECT_EQ(callCounter, 1); - ASSERT_NE(slaveIterator, generator.end()); - - ++iterator; - EXPECT_EQ(callCounter, 2); - ASSERT_NE(iterator, generator.end()); - - generator.setMaxSize(0); - - ++slaveIterator; - EXPECT_EQ(callCounter, 2); - ASSERT_NE(slaveIterator, generator.end()); - - ++iterator; - EXPECT_EQ(callCounter, 5); - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(**iterator, 2); - - ++iterator; - EXPECT_EQ(callCounter, 5); - EXPECT_EQ(iterator, generator.end()); - - ++slaveIterator; - ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(**slaveIterator, 2); -} - -// _____________________________________________________________________________ -TEST(CacheableGenerator, verifySlavesCantBlockMasterIterator) { - CacheableGenerator generator{testGenerator(3)}; - generator.setMaxSize(1); - - auto masterIterator = generator.begin(true); - ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(**masterIterator, 0); - - auto slaveIterator = generator.begin(false); - ASSERT_NE(slaveIterator, generator.end()); - EXPECT_EQ(**slaveIterator, 0); - - ++masterIterator; - ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(**masterIterator, 1); - - ++masterIterator; - ASSERT_NE(masterIterator, generator.end()); - EXPECT_EQ(**masterIterator, 2); - - EXPECT_THROW(**slaveIterator, ad_utility::IteratorExpired); - - ++masterIterator; - EXPECT_EQ(masterIterator, generator.end()); +TEST(CacheableGenerator, placeholder) { + auto test = wrapGeneratorWithCache( + testGenerator(10), + [](std::optional& optionalValue, const uint32_t& newValue) { + if (optionalValue.has_value()) { + optionalValue.value() += newValue; + } else { + optionalValue.emplace(newValue); + } + return true; + }, + [](const std::optional&) {}); + EXPECT_EQ(1, 1); } diff --git a/test/ConcurrentCacheTest.cpp b/test/ConcurrentCacheTest.cpp index 1ab691d610..8e4c023e1a 100644 --- a/test/ConcurrentCacheTest.cpp +++ b/test/ConcurrentCacheTest.cpp @@ -73,12 +73,16 @@ using SimpleConcurrentLruCache = ad_utility::ConcurrentCache>>; +namespace { +auto returnTrue = [](const auto&) { return true; }; +} // namespace + TEST(ConcurrentCache, sequentialComputation) { SimpleConcurrentLruCache a{3ul}; ad_utility::Timer t{ad_utility::Timer::Started}; // Fake computation that takes 5ms and returns value "3", which is then // stored under key 3. - auto result = a.computeOnce(3, waiting_function("3"s, 5)); + auto result = a.computeOnce(3, waiting_function("3"s, 5), false, returnTrue); ASSERT_EQ("3"s, *result._resultPointer); ASSERT_EQ(result._cacheStatus, ad_utility::CacheStatus::computed); ASSERT_GE(t.msecs(), 5ms); @@ -90,7 +94,7 @@ TEST(ConcurrentCache, sequentialComputation) { t.reset(); t.start(); // takes 0 msecs to compute, as the request is served from the cache. - auto result2 = a.computeOnce(3, waiting_function("3"s, 5)); + auto result2 = a.computeOnce(3, waiting_function("3"s, 5), false, returnTrue); // computing result again: still yields "3", was cached and takes 0 // milliseconds (result is read from cache) ASSERT_EQ("3"s, *result2._resultPointer); @@ -107,7 +111,8 @@ TEST(ConcurrentCache, sequentialPinnedComputation) { ad_utility::Timer t{ad_utility::Timer::Started}; // Fake computation that takes 5ms and returns value "3", which is then // stored under key 3. - auto result = a.computeOncePinned(3, waiting_function("3"s, 5)); + auto result = + a.computeOncePinned(3, waiting_function("3"s, 5), false, returnTrue); ASSERT_EQ("3"s, *result._resultPointer); ASSERT_EQ(result._cacheStatus, ad_utility::CacheStatus::computed); ASSERT_GE(t.msecs(), 5ms); @@ -120,7 +125,7 @@ TEST(ConcurrentCache, sequentialPinnedComputation) { t.start(); // takes 0 msecs to compute, as the request is served from the cache. // we don't request a pin, but the original computation was pinned - auto result2 = a.computeOnce(3, waiting_function("3"s, 5)); + auto result2 = a.computeOnce(3, waiting_function("3"s, 5), false, returnTrue); // computing result again: still yields "3", was cached and takes 0 // milliseconds (result is read from cache) ASSERT_EQ("3"s, *result2._resultPointer); @@ -137,7 +142,7 @@ TEST(ConcurrentCache, sequentialPinnedUpgradeComputation) { ad_utility::Timer t{ad_utility::Timer::Started}; // Fake computation that takes 5ms and returns value "3", which is then // stored under key 3. - auto result = a.computeOnce(3, waiting_function("3"s, 5)); + auto result = a.computeOnce(3, waiting_function("3"s, 5), false, returnTrue); ASSERT_EQ("3"s, *result._resultPointer); ASSERT_EQ(result._cacheStatus, ad_utility::CacheStatus::computed); ASSERT_GE(t.msecs(), 5ms); @@ -151,7 +156,8 @@ TEST(ConcurrentCache, sequentialPinnedUpgradeComputation) { // takes 0 msecs to compute, as the request is served from the cache. // request a pin, the result should be read from the cache and upgraded // to a pinned result. - auto result2 = a.computeOncePinned(3, waiting_function("3"s, 5)); + auto result2 = + a.computeOncePinned(3, waiting_function("3"s, 5), false, returnTrue); // computing result again: still yields "3", was cached and takes 0 // milliseconds (result is read from cache) ASSERT_EQ("3"s, *result2._resultPointer); @@ -167,7 +173,8 @@ TEST(ConcurrentCache, concurrentComputation) { auto a = SimpleConcurrentLruCache(3ul); StartStopSignal signal; auto compute = [&a, &signal]() { - return a.computeOnce(3, waiting_function("3"s, 5, &signal)); + return a.computeOnce(3, waiting_function("3"s, 5, &signal), false, + returnTrue); }; auto resultFuture = std::async(std::launch::async, compute); signal.hasStartedSignal_.wait(); @@ -195,7 +202,8 @@ TEST(ConcurrentCache, concurrentPinnedComputation) { auto a = SimpleConcurrentLruCache(3ul); StartStopSignal signal; auto compute = [&a, &signal]() { - return a.computeOncePinned(3, waiting_function("3"s, 5, &signal)); + return a.computeOncePinned(3, waiting_function("3"s, 5, &signal), false, + returnTrue); }; auto resultFuture = std::async(std::launch::async, compute); signal.hasStartedSignal_.wait(); @@ -225,7 +233,8 @@ TEST(ConcurrentCache, concurrentPinnedUpgradeComputation) { auto a = SimpleConcurrentLruCache(3ul); StartStopSignal signal; auto compute = [&a, &signal]() { - return a.computeOnce(3, waiting_function("3"s, 5, &signal)); + return a.computeOnce(3, waiting_function("3"s, 5, &signal), false, + returnTrue); }; auto resultFuture = std::async(std::launch::async, compute); signal.hasStartedSignal_.wait(); @@ -240,7 +249,8 @@ TEST(ConcurrentCache, concurrentPinnedUpgradeComputation) { // this call waits for the background task to compute, and then fetches the // result. After this call completes, nothing is in progress and the result // is cached. - auto result = a.computeOncePinned(3, waiting_function("3"s, 5)); + auto result = + a.computeOncePinned(3, waiting_function("3"s, 5), false, returnTrue); ASSERT_EQ(0ul, a.numNonPinnedEntries()); ASSERT_EQ(1ul, a.numPinnedEntries()); ASSERT_TRUE(a.getStorage().wlock()->_inProgress.empty()); @@ -255,10 +265,12 @@ TEST(ConcurrentCache, abort) { auto a = SimpleConcurrentLruCache(3ul); StartStopSignal signal; auto compute = [&a, &signal]() { - return a.computeOnce(3, waiting_function("3"s, 5, &signal)); + return a.computeOnce(3, waiting_function("3"s, 5, &signal), false, + returnTrue); }; auto computeWithError = [&a, &signal]() { - return a.computeOnce(3, wait_and_throw_function(5, &signal)); + return a.computeOnce(3, wait_and_throw_function(5, &signal), false, + returnTrue); }; auto fut = std::async(std::launch::async, computeWithError); signal.hasStartedSignal_.wait(); @@ -279,10 +291,12 @@ TEST(ConcurrentCache, abortPinned) { auto a = SimpleConcurrentLruCache(3ul); StartStopSignal signal; auto compute = [&]() { - return a.computeOncePinned(3, waiting_function("3"s, 5, &signal)); + return a.computeOncePinned(3, waiting_function("3"s, 5, &signal), false, + returnTrue); }; auto computeWithError = [&a, &signal]() { - return a.computeOncePinned(3, wait_and_throw_function(5, &signal)); + return a.computeOncePinned(3, wait_and_throw_function(5, &signal), false, + returnTrue); }; auto fut = std::async(std::launch::async, computeWithError); signal.hasStartedSignal_.wait(); diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 70ab494589..06b5ebff89 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1076,10 +1076,9 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { idTable.push_back({Id::makeFromInt(42)}); idTable.push_back({Id::makeFromInt(1337)}); - Result result = Result::createResultWithFullyEvaluatedIdTable( - std::make_shared( - ProtoResult{std::move(idTable), {}, LocalVocab{}}, - std::numeric_limits::max())); + Result result = Result::fromProtoResult( + ProtoResult{std::move(idTable), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -1109,11 +1108,9 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { co_yield std::move(idTable2); }(); - Result result = Result::createResultAsMasterConsumer( - std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - std::numeric_limits::max()), - []() {}); + Result result = Result::fromProtoResult( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -1144,11 +1141,9 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { co_yield std::move(idTable1); }(); - Result result = Result::createResultAsMasterConsumer( - std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - std::numeric_limits::max()), - []() {}); + Result result = Result::fromProtoResult( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); @@ -1178,11 +1173,9 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable2); }(); - Result result = Result::createResultAsMasterConsumer( - std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - std::numeric_limits::max()), - []() {}); + Result result = Result::fromProtoResult( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); @@ -1216,11 +1209,9 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable2); }(); - Result result = Result::createResultAsMasterConsumer( - std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - std::numeric_limits::max()), - []() {}); + Result result = Result::fromProtoResult( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); @@ -1258,11 +1249,9 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable2); }(); - Result result = Result::createResultAsMasterConsumer( - std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - std::numeric_limits::max()), - []() {}); + Result result = Result::fromProtoResult( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); @@ -1308,11 +1297,9 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable3); }(); - Result result = Result::createResultAsMasterConsumer( - std::make_shared( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - std::numeric_limits::max()), - []() {}); + Result result = Result::fromProtoResult( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); diff --git a/test/IteratorWrapperTest.cpp b/test/IteratorWrapperTest.cpp deleted file mode 100644 index 1637102eb9..0000000000 --- a/test/IteratorWrapperTest.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Robin Textor-Falconi - -#include - -#include - -#include "util/IteratorWrapper.h" - -using ad_utility::IteratorWrapper; - -TEST(IteratorWrapper, transparentWrapper) { - std::vector vec{1, 2, 3}; - int numIterations = 0; - for (auto value : IteratorWrapper{vec}) { - EXPECT_EQ(value, numIterations + 1); - numIterations++; - } - EXPECT_EQ(numIterations, 3); -} - -// _____________________________________________________________________________ - -struct TestIterable { - std::vector vec_{1, 2, 3}; - bool value1_ = false; - int value2_ = 0; - std::string value3_ = ""; - - auto begin(bool value1, int value2, std::string value3) { - value1_ = value1; - value2_ = value2; - value3_ = std::move(value3); - return vec_.begin(); - } - - auto end() { return vec_.end(); } -}; - -TEST(IteratorWrapper, verifyArgumentsArePassed) { - TestIterable testIterable; - int numIterations = 0; - for (auto value : IteratorWrapper{testIterable, true, 42, "Hi"}) { - EXPECT_EQ(value, numIterations + 1); - numIterations++; - } - EXPECT_EQ(numIterations, 3); - EXPECT_TRUE(testIterable.value1_); - EXPECT_EQ(testIterable.value2_, 42); - EXPECT_EQ(testIterable.value3_, "Hi"); -} diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp index cd634b0cdf..d7b3602ab4 100644 --- a/test/SparqlDataTypesTest.cpp +++ b/test/SparqlDataTypesTest.cpp @@ -16,11 +16,10 @@ using enum PositionInTriple; namespace { struct ContextWrapper { Index _index{ad_utility::makeUnlimitedAllocator()}; - Result _resultTable{Result::createResultWithFullyEvaluatedIdTable( - std::make_shared( - ProtoResult{ - IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}, - std::numeric_limits::max()))}; + Result _resultTable = Result::fromProtoResult( + ProtoResult{ + IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); // TODO `VariableToColumnMap` VariableToColumnMap _hashMap{}; @@ -30,11 +29,9 @@ struct ContextWrapper { } void setIdTable(IdTable&& table) { - _resultTable = Result::createResultWithFullyEvaluatedIdTable( - std::make_shared( - ProtoResult{ - std::move(table), {}, _resultTable.getSharedLocalVocab()}, - std::numeric_limits::max())); + _resultTable = Result::fromProtoResult( + ProtoResult{std::move(table), {}, _resultTable.getSharedLocalVocab()}, + [](const auto&) { return false; }, [](auto) {}); } }; From 4b93109b72c759b539a163d960c152fa9fd44a58 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:10:00 +0200 Subject: [PATCH 092/133] Make diff smaller and add some more tests --- src/engine/Result.cpp | 11 +++--- src/util/CacheableGenerator.h | 6 ++-- src/util/ConcurrentCache.h | 1 + test/CacheTest.cpp | 1 + test/CacheableGeneratorTest.cpp | 62 +++++++++++++++++++++++++++++---- test/OperationTest.cpp | 9 ++--- 6 files changed, 68 insertions(+), 22 deletions(-) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index f530944f67..62de8a5db8 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -247,13 +247,10 @@ Result Result::fromProtoResult(ProtoResult protoResult, }, [storeInCache = std::move(storeInCache), sortedByCopy = std::move(sortedByCopy), - localVocabReference = std::move(localVocabReference)]( - std::optional idTable) mutable { - if (idTable.has_value()) { - storeInCache(Result{std::move(idTable).value(), - std::move(sortedByCopy), - std::move(localVocabReference)}); - } + localVocabReference = + std::move(localVocabReference)](IdTable idTable) mutable { + storeInCache(Result{std::move(idTable), std::move(sortedByCopy), + std::move(localVocabReference)}); }), std::move(protoResult.storage_.sortedBy_), std::move(protoResult.storage_.localVocab_)}; diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 32f456a727..9111dafbf5 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -16,7 +16,7 @@ cppcoro::generator wrapGeneratorWithCache( cppcoro::generator generator, InvocableWithExactReturnType&, const T&> auto aggregator, - InvocableWithExactReturnType> auto onFullyCached) { + InvocableWithExactReturnType auto onFullyCached) { std::optional aggregatedData{}; bool aggregate = true; for (auto&& element : generator) { @@ -28,8 +28,8 @@ cppcoro::generator wrapGeneratorWithCache( } co_yield AD_FWD(element); } - if (aggregate) { - onFullyCached(std::move(aggregatedData)); + if (aggregatedData.has_value()) { + onFullyCached(std::move(aggregatedData).value()); } } }; // namespace ad_utility diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index bc61265c2d..b9e9e51799 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -332,6 +332,7 @@ class ConcurrentCache { } private: + // implementation for computeOnce (pinned and normal variant). ResultAndCacheStatus computeOnceImpl( bool pinned, const Key& key, const InvocableWithConvertibleReturnType auto& computeFunction, diff --git a/test/CacheTest.cpp b/test/CacheTest.cpp index de6491481c..885c652ed6 100644 --- a/test/CacheTest.cpp +++ b/test/CacheTest.cpp @@ -5,6 +5,7 @@ #include #include +#include #include "util/Cache.h" #include "util/DefaultValueSizeGetter.h" diff --git a/test/CacheableGeneratorTest.cpp b/test/CacheableGeneratorTest.cpp index dc07ecfe60..a96a064429 100644 --- a/test/CacheableGeneratorTest.cpp +++ b/test/CacheableGeneratorTest.cpp @@ -9,7 +9,7 @@ using ad_utility::wrapGeneratorWithCache; using cppcoro::generator; -using namespace std::chrono_literals; +using ::testing::Optional; generator testGenerator(uint32_t range) { for (uint32_t i = 0; i < range; i++) { @@ -17,9 +17,11 @@ generator testGenerator(uint32_t range) { } } -TEST(CacheableGenerator, placeholder) { - auto test = wrapGeneratorWithCache( - testGenerator(10), +// _____________________________________________________________________________ +TEST(CacheableGenerator, testAggregation) { + bool called = false; + auto gen = wrapGeneratorWithCache( + testGenerator(4), [](std::optional& optionalValue, const uint32_t& newValue) { if (optionalValue.has_value()) { optionalValue.value() += newValue; @@ -28,6 +30,54 @@ TEST(CacheableGenerator, placeholder) { } return true; }, - [](const std::optional&) {}); - EXPECT_EQ(1, 1); + [&called](std::optional value) { + called = true; + EXPECT_THAT(value, Optional(6)); + }); + uint32_t counter = 0; + for (uint32_t element : gen) { + EXPECT_EQ(counter, element); + ++counter; + } + EXPECT_EQ(counter, 4); + EXPECT_TRUE(called); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, testEmptyGenerator) { + bool called = false; + auto gen = wrapGeneratorWithCache( + testGenerator(0), + [&called](std::optional&, uint32_t) { + called = true; + return true; + }, + [&called](std::optional) { called = true; }); + uint32_t tracker = 0; + for (uint32_t element : gen) { + tracker += element; + } + EXPECT_EQ(tracker, 0); + EXPECT_FALSE(called); +} + +// _____________________________________________________________________________ +TEST(CacheableGenerator, testAggregationCutoff) { + uint32_t callCounter = 0; + bool called = false; + auto gen = wrapGeneratorWithCache( + testGenerator(2), + [&callCounter](std::optional&, uint32_t) { + ++callCounter; + return false; + }, + [&called](std::optional) { called = true; }); + uint32_t loopCounter = 0; + for (uint32_t element : gen) { + EXPECT_EQ(element, loopCounter); + ++loopCounter; + } + EXPECT_EQ(loopCounter, 2); + EXPECT_EQ(callCounter, 1); + EXPECT_FALSE(called); } diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 7f3c5c758a..9e9d47571a 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -58,8 +58,7 @@ TEST(OperationTest, getResultOnlyCached) { // When we now request to only return the result if it is cached, we should // get exactly the same `shared_ptr` as with the previous call. NeutralElementOperation n3{qec}; - EXPECT_EQ(&n3.getResult(true, ComputationMode::ONLY_IF_CACHED)->idTable(), - &result->idTable()); + EXPECT_EQ(n3.getResult(true, ComputationMode::ONLY_IF_CACHED), result); EXPECT_EQ(n3.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedNotPinned); @@ -68,8 +67,7 @@ TEST(OperationTest, getResultOnlyCached) { QueryExecutionContext qecCopy{*qec}; qecCopy._pinResult = true; NeutralElementOperation n4{&qecCopy}; - EXPECT_EQ(&n4.getResult(true, ComputationMode::ONLY_IF_CACHED)->idTable(), - &result->idTable()); + EXPECT_EQ(n4.getResult(true, ComputationMode::ONLY_IF_CACHED), result); // The cache status is `cachedNotPinned` because we found the element cached // but not pinned (it does reflect the status BEFORE the operation). @@ -81,8 +79,7 @@ TEST(OperationTest, getResultOnlyCached) { // We have pinned the result, so requesting it again should return a pinned // result. qecCopy._pinResult = false; - EXPECT_EQ(&n4.getResult(true, ComputationMode::ONLY_IF_CACHED)->idTable(), - &result->idTable()); + EXPECT_EQ(n4.getResult(true, ComputationMode::ONLY_IF_CACHED), result); EXPECT_EQ(n4.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::cachedPinned); From 7294b10259036381ffc41e5422ab50916a4dd053 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:43:47 +0200 Subject: [PATCH 093/133] Fix problems with `RuntimeInformation` --- src/engine/ExportQueryExecutionTrees.cpp | 94 ++++++++++++++---------- src/engine/ExportQueryExecutionTrees.h | 19 +++-- src/engine/Operation.cpp | 51 +++++++------ src/engine/Operation.h | 4 +- src/engine/Result.cpp | 22 ++++++ src/engine/Result.h | 3 + test/ExportQueryExecutionTreesTest.cpp | 57 ++++++++++++-- 7 files changed, 174 insertions(+), 76 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 31f0763173..a12331c057 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -15,12 +15,15 @@ // __________________________________________________________________________ cppcoro::generator ExportQueryExecutionTrees::getIdTables( - const Result& result) { + const Result& result, std::chrono::milliseconds& totalTime) { if (result.isDataEvaluated()) { co_yield result.idTable(); } else { + ad_utility::Timer timer{ad_utility::Timer::Started}; for (const IdTable& idTable : result.idTables()) { + totalTime += timer.msecs(); co_yield idTable; + timer.start(); } } } @@ -30,8 +33,9 @@ cppcoro::generator ExportQueryExecutionTrees::getIdTables( // LIMIT, the OFFSET, and the actual size of the `idTable` cppcoro::generator ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, - const Result& result) { - for (const IdTable& idTable : getIdTables(result)) { + const Result& result, + std::chrono::milliseconds& totalTime) { + for (const IdTable& idTable : getIdTables(result, totalTime)) { uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); for (size_t index = currentOffset; index < upperBound; index++) { @@ -51,8 +55,9 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr result, - CancellationHandle cancellationHandle) { - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { + CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result, totalTime)) { ConstructQueryExportContext context{i, idTable, result->localVocab(), qet.getVariableColumns(), qet.getQec()->getIndex()}; @@ -81,9 +86,10 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { result->logResultSize(); - auto generator = - constructQueryResultToTriples(qet, constructTriples, limitAndOffset, - result, std::move(cancellationHandle)); + std::chrono::milliseconds placeholder; + auto generator = constructQueryResultToTriples( + qet, constructTriples, limitAndOffset, result, + std::move(cancellationHandle), placeholder); for (const auto& triple : generator) { co_yield triple.subject_; co_yield ' '; @@ -113,10 +119,11 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, const LimitOffsetClause& limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle) { - auto generator = constructQueryResultToTriples(qet, constructTriples, - limitAndOffset, std::move(res), - std::move(cancellationHandle)); + CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime) { + auto generator = constructQueryResultToTriples( + qet, constructTriples, limitAndOffset, std::move(res), + std::move(cancellationHandle), totalTime); std::vector> jsonArray; for (auto& triple : generator) { jsonArray.push_back({std::move(triple.subject_), @@ -130,12 +137,13 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, - std::shared_ptr result, - CancellationHandle cancellationHandle) { + std::shared_ptr result, CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime) { AD_CORRECTNESS_CHECK(result != nullptr); nlohmann::json json = nlohmann::json::array(); - for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *result)) { + for (auto [rowIndex, idTable] : + getRowIndices(limitAndOffset, *result, totalTime)) { // We need the explicit `array` constructor for the special case of zero // variables. json.push_back(nlohmann::json::array()); @@ -375,7 +383,9 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *result)) { + std::chrono::milliseconds placeholder; + for (auto [rowIndex, idTable] : + getRowIndices(limitAndOffset, *result, placeholder)) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. nlohmann::ordered_json binding; @@ -411,8 +421,8 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - std::shared_ptr result, - CancellationHandle cancellationHandle) { + std::shared_ptr result, CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime) { AD_CORRECTNESS_CHECK(result != nullptr); LOG(DEBUG) << "Resolving strings for finished binary result...\n"; QueryExecutionTree::ColumnIndicesAndTypes selectedColumnIndices = @@ -420,7 +430,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( return idTableToQLeverJSONArray(qet, limitAndOffset, selectedColumnIndices, std::move(result), - std::move(cancellationHandle)); + std::move(cancellationHandle), totalTime); } using parsedQuery::SelectClause; @@ -450,7 +460,9 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { + std::chrono::milliseconds placeholder; + for (auto [i, idTable] : + getRowIndices(limitAndOffset, *result, placeholder)) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { co_yield std::string_view{reinterpret_cast(&idTable( @@ -478,7 +490,9 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { + std::chrono::milliseconds placeholder; + for (auto [i, idTable] : + getRowIndices(limitAndOffset, *result, placeholder)) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); @@ -600,8 +614,10 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: result->logResultSize(); auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); + std::chrono::milliseconds placeholder; // TODO we could prefilter for the nonexisting variables. - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { + for (auto [i, idTable] : + getRowIndices(limitAndOffset, *result, placeholder)) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { @@ -640,9 +656,10 @@ ExportQueryExecutionTrees::constructQueryResultToStream( ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; constexpr char sep = format == MediaType::tsv ? '\t' : ','; - auto generator = - constructQueryResultToTriples(qet, constructTriples, limitAndOffset, - result, std::move(cancellationHandle)); + std::chrono::milliseconds placeholder; + auto generator = constructQueryResultToTriples( + qet, constructTriples, limitAndOffset, result, + std::move(cancellationHandle), placeholder); for (auto& triple : generator) { co_yield escapeFunction(std::move(triple.subject_)); co_yield sep; @@ -661,8 +678,10 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( std::shared_ptr result = qet.getResult(query._limitOffset._limit.has_value()); result->logResultSize(); - // TODO this timer only makes sense for non lazy results. - auto timeResultComputation = requestTimer.msecs(); + using namespace std::chrono_literals; + // This timer only makes sense for non lazy results. + auto timeResultComputation = + result->isDataEvaluated() ? requestTimer.msecs() : 0ms; std::optional resultSize = query.hasSelectClause() && result->isDataEvaluated() @@ -683,22 +702,21 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( j["runtimeInformation"]["meta"] = nlohmann::ordered_json( qet.getRootOperation()->getRuntimeInfoWholeQuery()); + + j["res"] = query.hasSelectClause() + ? selectQueryResultBindingsToQLeverJSON( + qet, query.selectClause(), query._limitOffset, + std::move(result), std::move(cancellationHandle), + timeResultComputation) + : constructQueryResultBindingsToQLeverJSON( + qet, query.constructClause().triples_, + query._limitOffset, std::move(result), + std::move(cancellationHandle), timeResultComputation); RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo(); runtimeInformation.addLimitOffsetRow( query._limitOffset, std::chrono::milliseconds::zero(), false); j["runtimeInformation"]["query_execution_tree"] = nlohmann::ordered_json(runtimeInformation); - - { - j["res"] = - query.hasSelectClause() - ? selectQueryResultBindingsToQLeverJSON( - qet, query.selectClause(), query._limitOffset, - std::move(result), std::move(cancellationHandle)) - : constructQueryResultBindingsToQLeverJSON( - qet, query.constructClause().triples_, query._limitOffset, - std::move(result), std::move(cancellationHandle)); - } j["resultsize"] = resultSize.value_or(j["res"].size()); j["time"]["total"] = std::to_string(requestTimer.msecs().count()) + "ms"; j["time"]["computeResult"] = diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index a85e35e546..e34705e460 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -117,7 +117,8 @@ class ExportQueryExecutionTrees { const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, - CancellationHandle cancellationHandle); + CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime); /** * @brief Convert an `IdTable` (typically from a query result) to a JSON array @@ -137,14 +138,16 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, std::shared_ptr resultTable, - CancellationHandle cancellationHandle); + CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime); // ___________________________________________________________________________ static nlohmann::json constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, const LimitOffsetClause& limitAndOffset, - std::shared_ptr res, CancellationHandle cancellationHandle); + std::shared_ptr res, CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime); // Generate an RDF graph for a CONSTRUCT query. static cppcoro::generator @@ -152,7 +155,8 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle); + CancellationHandle cancellationHandle, + std::chrono::milliseconds& totalTime); // ___________________________________________________________________________ static nlohmann::json selectQueryResultToSparqlJSON( @@ -183,15 +187,18 @@ class ExportQueryExecutionTrees { const IdTable& idTable_; }; - static cppcoro::generator getIdTables(const Result& result); + static cppcoro::generator getIdTables( + const Result& result, std::chrono::milliseconds& totalTime); // Return a range that contains the indices of the rows that have to be // exported from the `idTable` given the `LimitOffsetClause`. It takes into // account the LIMIT, the OFFSET, and the actual size of the `idTable` static cppcoro::generator getRowIndices( - LimitOffsetClause limitOffset, const Result& result); + LimitOffsetClause limitOffset, const Result& result, + std::chrono::milliseconds& totalTime); FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator); FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator); + FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesTimingInfoIsCorrect); FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable); FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 61d9660e15..e8912e3cd3 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -71,7 +71,8 @@ void Operation::recursivelySetTimeConstraint( // _____________________________________________________________________________ ProtoResult Operation::runComputation(ad_utility::Timer& timer, - ComputationMode computationMode) { + ComputationMode computationMode, + bool isRoot) { checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); @@ -100,9 +101,20 @@ ProtoResult Operation::runComputation(ad_utility::Timer& timer, ad_utility::CacheStatus::computed, timer.msecs(), std::nullopt); } else { - // TODO check if this is sufficient here or we need more of - // `updateRuntimeInformationOnSuccess` functionality here. runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; + result.runOnNewChunkComputed([this, isRoot]( + const IdTable& idTable, + std::chrono::milliseconds duration) { + runtimeInfo().totalTime_ += duration; + runtimeInfo().originalOperationTime_ = runtimeInfo().getOperationTime(); + runtimeInfo().numRows_ = idTable.numRows(); + runtimeInfo().numCols_ = idTable.numColumns(); + LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() + << " x " << idTable.numColumns() << std::endl; + if (isRoot) { + signalQueryUpdate(); + } + }); } // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not // already perform it. An example for an operation that directly computes @@ -127,25 +139,19 @@ ProtoResult Operation::runComputation(ad_utility::Timer& timer, // _____________________________________________________________________________ CacheValue Operation::runComputationAndTransformToCache( ad_utility::Timer& timer, ComputationMode computationMode, - const std::string& cacheKey, bool pinned) { + const std::string& cacheKey, bool pinned, bool isRoot) { auto& cache = _executionContext->getQueryTreeCache(); auto result = Result::fromProtoResult( - runComputation(timer, computationMode), + runComputation(timer, computationMode, isRoot), [&cache](const IdTable& idTable) { return cache.getMaxSizeSingleEntry() >= CacheValue::getSize(idTable); }, - [this, &cache, cacheKey, pinned](Result aggregatedResult) { + [runtimeInfo = getRuntimeInfoPointer(), &cache, cacheKey, + pinned](Result aggregatedResult) { cache.tryInsertIfNotPresent( pinned, cacheKey, - CacheValue{std::move(aggregatedResult), runtimeInfo()}); + CacheValue{std::move(aggregatedResult), *runtimeInfo}); }); - /* - TODO incorporate time calculations and query updates. - runtimeInfo().totalTime_ += duration; - if (isRoot) { - signalQueryUpdate(); - } - */ if (result.isDataEvaluated()) { auto resultNumRows = result.idTable().size(); auto resultNumCols = result.idTable().numColumns(); @@ -188,9 +194,10 @@ std::shared_ptr Operation::getResult( updateRuntimeInformationOnFailure(timer.msecs()); } }); - auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult]() { + auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult, + isRoot]() { return runComputationAndTransformToCache(timer, computationMode, cacheKey, - pinResult); + pinResult, isRoot); }; auto suitedForCache = [](const CacheValue& cacheValue) { @@ -210,10 +217,9 @@ std::shared_ptr Operation::getResult( return nullptr; } - updateRuntimeInformationOnSuccess( - result, result._resultPointer->resultTable().isDataEvaluated() - ? timer.msecs() - : result._resultPointer->runtimeInfo().totalTime_); + if (result._resultPointer->resultTable().isDataEvaluated()) { + updateRuntimeInformationOnSuccess(result, timer.msecs()); + } return result._resultPointer->resultTablePtr(); } catch (ad_utility::CancellationException& e) { @@ -303,10 +309,9 @@ void Operation::updateRuntimeInformationOnSuccess( const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) { const auto& result = resultAndCacheStatus._resultPointer->resultTable(); + AD_CONTRACT_CHECK(result.isDataEvaluated()); updateRuntimeInformationOnSuccess( - // TODO find a better representation for "unknown" than 0. - result.isDataEvaluated() ? result.idTable().size() : 0, - resultAndCacheStatus._cacheStatus, duration, + result.idTable().size(), resultAndCacheStatus._cacheStatus, duration, resultAndCacheStatus._resultPointer->runtimeInfo()); } diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 2eb70d647e..07a67d5056 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -261,12 +261,12 @@ class Operation { virtual ProtoResult computeResult(bool requestLaziness) = 0; ProtoResult runComputation(ad_utility::Timer& timer, - ComputationMode computationMode); + ComputationMode computationMode, bool isRoot); CacheValue runComputationAndTransformToCache(ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, - bool pinned); + bool pinned, bool isRoot); // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 62de8a5db8..c2e3be50e1 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -168,6 +168,24 @@ void ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { } } +// _____________________________________________________________________________ +void ProtoResult::runOnNewChunkComputed( + std::function function) { + AD_CONTRACT_CHECK(!storage_.isDataEvaluated()); + auto generator = + [](cppcoro::generator original, + std::function + function) -> cppcoro::generator { + ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; + for (auto&& idTable : original) { + function(idTable, timer.msecs()); + co_yield std::forward(idTable); + timer.start(); + } + }(std::move(storage_.idTables()), std::move(function)); + storage_.idTables() = std::move(generator); +} + // _____________________________________________________________________________ auto ProtoResult::computeDatatypeCountsPerColumn(IdTable& idTable) -> DatatypeCountsPerColumn { @@ -243,6 +261,10 @@ Result Result::fromProtoResult(ProtoResult protoResult, } else { aggregate.emplace(newTable.clone()); } + // TODO Review question: Should we compute the sizes + // individually and add the result together to then check the size + // at the cost of a more complex/less generic interface to avoid + // filling up memory that might be deallocated soon after. return fitsInCache(aggregate.value()); }, [storeInCache = std::move(storeInCache), diff --git a/src/engine/Result.h b/src/engine/Result.h index a7cf837320..10599e09d7 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -152,6 +152,9 @@ class ProtoResult { // check is successful. void checkDefinedness(const VariableToColumnMap& varColMap); + void runOnNewChunkComputed( + std::function function); + private: // Get the information, which columns stores how many entries of each // datatype. diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 06b5ebff89..5fa5b9b376 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -13,6 +13,7 @@ #include "util/IndexTestHelpers.h" using namespace std::string_literals; +using namespace std::chrono_literals; using ::testing::HasSubstr; // Run the given SPARQL `query` on the given Turtle `kg` and export the result @@ -1079,7 +1080,9 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { Result result = Result::fromProtoResult( ProtoResult{std::move(idTable), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - auto generator = ExportQueryExecutionTrees::getIdTables(result); + + std::chrono::milliseconds time = 0ms; + auto generator = ExportQueryExecutionTrees::getIdTables(result, time); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1089,6 +1092,8 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { ++iterator; EXPECT_EQ(iterator, generator.end()); + + EXPECT_EQ(time, 0ms); } // _____________________________________________________________________________ @@ -1111,7 +1116,8 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - auto generator = ExportQueryExecutionTrees::getIdTables(result); + std::chrono::milliseconds time; + auto generator = ExportQueryExecutionTrees::getIdTables(result, time); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1130,6 +1136,38 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { EXPECT_EQ(iterator, generator.end()); } +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, getIdTablesTimingInfoIsCorrect) { + auto tableGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + + std::this_thread::sleep_for(1ms); + + co_yield std::move(idTable1); + }(); + + Result result = Result::fromProtoResult( + ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, + [](const auto&) { return false; }, [](auto) {}); + std::chrono::milliseconds time = 0ms; + auto generator = ExportQueryExecutionTrees::getIdTables(result, time); + + auto iterator = generator.begin(); + ASSERT_NE(iterator, generator.end()); + ASSERT_EQ(iterator->size(), 1); + EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(1)); + + ++iterator; + EXPECT_EQ(iterator, generator.end()); + +#ifdef _QLEVER_NO_TIMING_TESTS + EXPECT_GE(time, 1ms); +#else + EXPECT_EQ(time, 1ms); +#endif +} + // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { auto tableGenerator = []() -> cppcoro::generator { @@ -1144,8 +1182,9 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); + std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 1, ._offset = 1}, result); + LimitOffsetClause{._limit = 1, ._offset = 1}, result, time); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1176,8 +1215,9 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); + std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); + LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result, time); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1212,8 +1252,9 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); + std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 3}, result); + LimitOffsetClause{._limit = 3}, result, time); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1252,8 +1293,9 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); + std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 3, ._offset = 1}, result); + LimitOffsetClause{._limit = 3, ._offset = 1}, result, time); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1300,8 +1342,9 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); + std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 5, ._offset = 2}, result); + LimitOffsetClause{._limit = 5, ._offset = 2}, result, time); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); From 3b9fee80848091e78b9bb7e4ee3ffa866bcf23c5 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:27:58 +0200 Subject: [PATCH 094/133] Simplify timing calculation --- src/engine/ExportQueryExecutionTrees.cpp | 105 ++++++++++------------- src/engine/ExportQueryExecutionTrees.h | 19 ++-- test/ExportQueryExecutionTreesTest.cpp | 57 ++---------- 3 files changed, 56 insertions(+), 125 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index a12331c057..d634b7f7b0 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -15,15 +15,12 @@ // __________________________________________________________________________ cppcoro::generator ExportQueryExecutionTrees::getIdTables( - const Result& result, std::chrono::milliseconds& totalTime) { + const Result& result) { if (result.isDataEvaluated()) { co_yield result.idTable(); } else { - ad_utility::Timer timer{ad_utility::Timer::Started}; for (const IdTable& idTable : result.idTables()) { - totalTime += timer.msecs(); co_yield idTable; - timer.start(); } } } @@ -33,9 +30,8 @@ cppcoro::generator ExportQueryExecutionTrees::getIdTables( // LIMIT, the OFFSET, and the actual size of the `idTable` cppcoro::generator ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, - const Result& result, - std::chrono::milliseconds& totalTime) { - for (const IdTable& idTable : getIdTables(result, totalTime)) { + const Result& result) { + for (const IdTable& idTable : getIdTables(result)) { uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); for (size_t index = currentOffset; index < upperBound; index++) { @@ -55,9 +51,8 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr result, - CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime) { - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result, totalTime)) { + CancellationHandle cancellationHandle) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { ConstructQueryExportContext context{i, idTable, result->localVocab(), qet.getVariableColumns(), qet.getQec()->getIndex()}; @@ -86,10 +81,9 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { result->logResultSize(); - std::chrono::milliseconds placeholder; - auto generator = constructQueryResultToTriples( - qet, constructTriples, limitAndOffset, result, - std::move(cancellationHandle), placeholder); + auto generator = + constructQueryResultToTriples(qet, constructTriples, limitAndOffset, + result, std::move(cancellationHandle)); for (const auto& triple : generator) { co_yield triple.subject_; co_yield ' '; @@ -119,11 +113,10 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, const LimitOffsetClause& limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime) { - auto generator = constructQueryResultToTriples( - qet, constructTriples, limitAndOffset, std::move(res), - std::move(cancellationHandle), totalTime); + CancellationHandle cancellationHandle) { + auto generator = constructQueryResultToTriples(qet, constructTriples, + limitAndOffset, std::move(res), + std::move(cancellationHandle)); std::vector> jsonArray; for (auto& triple : generator) { jsonArray.push_back({std::move(triple.subject_), @@ -137,13 +130,12 @@ ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON( nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, - std::shared_ptr result, CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime) { + std::shared_ptr result, + CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(result != nullptr); nlohmann::json json = nlohmann::json::array(); - for (auto [rowIndex, idTable] : - getRowIndices(limitAndOffset, *result, totalTime)) { + for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *result)) { // We need the explicit `array` constructor for the special case of zero // variables. json.push_back(nlohmann::json::array()); @@ -383,9 +375,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - std::chrono::milliseconds placeholder; - for (auto [rowIndex, idTable] : - getRowIndices(limitAndOffset, *result, placeholder)) { + for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *result)) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. nlohmann::ordered_json binding; @@ -421,8 +411,8 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, - std::shared_ptr result, CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime) { + std::shared_ptr result, + CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(result != nullptr); LOG(DEBUG) << "Resolving strings for finished binary result...\n"; QueryExecutionTree::ColumnIndicesAndTypes selectedColumnIndices = @@ -430,7 +420,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( return idTableToQLeverJSONArray(qet, limitAndOffset, selectedColumnIndices, std::move(result), - std::move(cancellationHandle), totalTime); + std::move(cancellationHandle)); } using parsedQuery::SelectClause; @@ -460,9 +450,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - std::chrono::milliseconds placeholder; - for (auto [i, idTable] : - getRowIndices(limitAndOffset, *result, placeholder)) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { co_yield std::string_view{reinterpret_cast(&idTable( @@ -490,9 +478,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - std::chrono::milliseconds placeholder; - for (auto [i, idTable] : - getRowIndices(limitAndOffset, *result, placeholder)) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); @@ -614,10 +600,8 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: result->logResultSize(); auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); - std::chrono::milliseconds placeholder; // TODO we could prefilter for the nonexisting variables. - for (auto [i, idTable] : - getRowIndices(limitAndOffset, *result, placeholder)) { + for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { @@ -656,10 +640,9 @@ ExportQueryExecutionTrees::constructQueryResultToStream( ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; constexpr char sep = format == MediaType::tsv ? '\t' : ','; - std::chrono::milliseconds placeholder; - auto generator = constructQueryResultToTriples( - qet, constructTriples, limitAndOffset, result, - std::move(cancellationHandle), placeholder); + auto generator = + constructQueryResultToTriples(qet, constructTriples, limitAndOffset, + result, std::move(cancellationHandle)); for (auto& triple : generator) { co_yield escapeFunction(std::move(triple.subject_)); co_yield sep; @@ -675,13 +658,10 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ParsedQuery& query, const QueryExecutionTree& qet, const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle) { + auto timeUntilFunctionCall = requestTimer.msecs(); std::shared_ptr result = qet.getResult(query._limitOffset._limit.has_value()); result->logResultSize(); - using namespace std::chrono_literals; - // This timer only makes sense for non lazy results. - auto timeResultComputation = - result->isDataEvaluated() ? requestTimer.msecs() : 0ms; std::optional resultSize = query.hasSelectClause() && result->isDataEvaluated() @@ -693,30 +673,31 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( j["query"] = query._originalString; j["status"] = "OK"; j["warnings"] = qet.collectWarnings(); - if (query.hasSelectClause()) { - j["selected"] = query.selectClause().getSelectedVariablesAsStrings(); - } else { - j["selected"] = - std::vector{"?subject", "?predicate", "?object"}; - } + j["selected"] = + query.hasSelectClause() + ? query.selectClause().getSelectedVariablesAsStrings() + : std::vector{"?subject", "?predicate", "?object"}; + + j["res"] = + query.hasSelectClause() + ? selectQueryResultBindingsToQLeverJSON( + qet, query.selectClause(), query._limitOffset, + std::move(result), std::move(cancellationHandle)) + : constructQueryResultBindingsToQLeverJSON( + qet, query.constructClause().triples_, query._limitOffset, + std::move(result), std::move(cancellationHandle)); j["runtimeInformation"]["meta"] = nlohmann::ordered_json( qet.getRootOperation()->getRuntimeInfoWholeQuery()); - - j["res"] = query.hasSelectClause() - ? selectQueryResultBindingsToQLeverJSON( - qet, query.selectClause(), query._limitOffset, - std::move(result), std::move(cancellationHandle), - timeResultComputation) - : constructQueryResultBindingsToQLeverJSON( - qet, query.constructClause().triples_, - query._limitOffset, std::move(result), - std::move(cancellationHandle), timeResultComputation); RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo(); runtimeInformation.addLimitOffsetRow( query._limitOffset, std::chrono::milliseconds::zero(), false); j["runtimeInformation"]["query_execution_tree"] = nlohmann::ordered_json(runtimeInformation); + + auto timeResultComputation = + timeUntilFunctionCall + runtimeInformation.totalTime_; + j["resultsize"] = resultSize.value_or(j["res"].size()); j["time"]["total"] = std::to_string(requestTimer.msecs().count()) + "ms"; j["time"]["computeResult"] = diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index e34705e460..a85e35e546 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -117,8 +117,7 @@ class ExportQueryExecutionTrees { const parsedQuery::SelectClause& selectClause, const LimitOffsetClause& limitAndOffset, std::shared_ptr resultTable, - CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime); + CancellationHandle cancellationHandle); /** * @brief Convert an `IdTable` (typically from a query result) to a JSON array @@ -138,16 +137,14 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset, const QueryExecutionTree::ColumnIndicesAndTypes& columns, std::shared_ptr resultTable, - CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime); + CancellationHandle cancellationHandle); // ___________________________________________________________________________ static nlohmann::json constructQueryResultBindingsToQLeverJSON( const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, const LimitOffsetClause& limitAndOffset, - std::shared_ptr res, CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime); + std::shared_ptr res, CancellationHandle cancellationHandle); // Generate an RDF graph for a CONSTRUCT query. static cppcoro::generator @@ -155,8 +152,7 @@ class ExportQueryExecutionTrees { const QueryExecutionTree& qet, const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr res, - CancellationHandle cancellationHandle, - std::chrono::milliseconds& totalTime); + CancellationHandle cancellationHandle); // ___________________________________________________________________________ static nlohmann::json selectQueryResultToSparqlJSON( @@ -187,18 +183,15 @@ class ExportQueryExecutionTrees { const IdTable& idTable_; }; - static cppcoro::generator getIdTables( - const Result& result, std::chrono::milliseconds& totalTime); + static cppcoro::generator getIdTables(const Result& result); // Return a range that contains the indices of the rows that have to be // exported from the `idTable` given the `LimitOffsetClause`. It takes into // account the LIMIT, the OFFSET, and the actual size of the `idTable` static cppcoro::generator getRowIndices( - LimitOffsetClause limitOffset, const Result& result, - std::chrono::milliseconds& totalTime); + LimitOffsetClause limitOffset, const Result& result); FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator); FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator); - FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesTimingInfoIsCorrect); FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable); FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped); diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 5fa5b9b376..06b5ebff89 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -13,7 +13,6 @@ #include "util/IndexTestHelpers.h" using namespace std::string_literals; -using namespace std::chrono_literals; using ::testing::HasSubstr; // Run the given SPARQL `query` on the given Turtle `kg` and export the result @@ -1080,9 +1079,7 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { Result result = Result::fromProtoResult( ProtoResult{std::move(idTable), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - - std::chrono::milliseconds time = 0ms; - auto generator = ExportQueryExecutionTrees::getIdTables(result, time); + auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1092,8 +1089,6 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { ++iterator; EXPECT_EQ(iterator, generator.end()); - - EXPECT_EQ(time, 0ms); } // _____________________________________________________________________________ @@ -1116,8 +1111,7 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - std::chrono::milliseconds time; - auto generator = ExportQueryExecutionTrees::getIdTables(result, time); + auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1136,38 +1130,6 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { EXPECT_EQ(iterator, generator.end()); } -// _____________________________________________________________________________ -TEST(ExportQueryExecutionTrees, getIdTablesTimingInfoIsCorrect) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - - std::this_thread::sleep_for(1ms); - - co_yield std::move(idTable1); - }(); - - Result result = Result::fromProtoResult( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); - std::chrono::milliseconds time = 0ms; - auto generator = ExportQueryExecutionTrees::getIdTables(result, time); - - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); - ASSERT_EQ(iterator->size(), 1); - EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(1)); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); - -#ifdef _QLEVER_NO_TIMING_TESTS - EXPECT_GE(time, 1ms); -#else - EXPECT_EQ(time, 1ms); -#endif -} - // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { auto tableGenerator = []() -> cppcoro::generator { @@ -1182,9 +1144,8 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 1, ._offset = 1}, result, time); + LimitOffsetClause{._limit = 1, ._offset = 1}, result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1215,9 +1176,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result, time); + LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1252,9 +1212,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 3}, result, time); + LimitOffsetClause{._limit = 3}, result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1293,9 +1252,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 3, ._offset = 1}, result, time); + LimitOffsetClause{._limit = 3, ._offset = 1}, result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); @@ -1342,9 +1300,8 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, [](const auto&) { return false; }, [](auto) {}); - std::chrono::milliseconds time; auto generator = ExportQueryExecutionTrees::getRowIndices( - LimitOffsetClause{._limit = 5, ._offset = 2}, result, time); + LimitOffsetClause{._limit = 5, ._offset = 2}, result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); From 8acb05af3f11d332d58ed24048330a26ef10aab1 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:41:01 +0200 Subject: [PATCH 095/133] Fix sonarqube issues --- src/engine/Operation.cpp | 2 +- src/engine/Operation.h | 2 +- src/engine/Result.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index e8912e3cd3..7d4618daf7 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -70,7 +70,7 @@ void Operation::recursivelySetTimeConstraint( } // _____________________________________________________________________________ -ProtoResult Operation::runComputation(ad_utility::Timer& timer, +ProtoResult Operation::runComputation(const ad_utility::Timer& timer, ComputationMode computationMode, bool isRoot) { checkCancellation(); diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 07a67d5056..22e1c3a6fd 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -260,7 +260,7 @@ class Operation { //! Compute the result of the query-subtree rooted at this element.. virtual ProtoResult computeResult(bool requestLaziness) = 0; - ProtoResult runComputation(ad_utility::Timer& timer, + ProtoResult runComputation(const ad_utility::Timer& timer, ComputationMode computationMode, bool isRoot); CacheValue runComputationAndTransformToCache(ad_utility::Timer& timer, diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index c2e3be50e1..d8928d8479 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -179,7 +179,7 @@ void ProtoResult::runOnNewChunkComputed( ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; for (auto&& idTable : original) { function(idTable, timer.msecs()); - co_yield std::forward(idTable); + co_yield std::move(idTable); timer.start(); } }(std::move(storage_.idTables()), std::move(function)); From 3ee55102f0473d39667fbaf2f1a6aedbc8a0e89c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 00:04:49 +0200 Subject: [PATCH 096/133] Use const ref --- src/engine/Operation.cpp | 2 +- src/engine/Operation.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 7d4618daf7..b390ca76b7 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -138,7 +138,7 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, // _____________________________________________________________________________ CacheValue Operation::runComputationAndTransformToCache( - ad_utility::Timer& timer, ComputationMode computationMode, + const ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, bool pinned, bool isRoot) { auto& cache = _executionContext->getQueryTreeCache(); auto result = Result::fromProtoResult( diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 22e1c3a6fd..5c60b13270 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -263,7 +263,7 @@ class Operation { ProtoResult runComputation(const ad_utility::Timer& timer, ComputationMode computationMode, bool isRoot); - CacheValue runComputationAndTransformToCache(ad_utility::Timer& timer, + CacheValue runComputationAndTransformToCache(const ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, bool pinned, bool isRoot); From 72d1033cd3351a2dfc77c766c3d15452eb90c56e Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 00:43:59 +0200 Subject: [PATCH 097/133] Apply microoptimization with mutex --- src/util/ConcurrentCache.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index b9e9e51799..0fab7fa82e 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -100,10 +100,11 @@ class ResultInProgress { // have called or will call getResult(). Check that none of the other threads // waiting for the result have already finished or were aborted. void finish(shared_ptr result) { - std::lock_guard lockGuard(_mutex); + std::unique_lock lockGuard(_mutex); AD_CONTRACT_CHECK(_status == Status::IN_PROGRESS); _status = Status::FINISHED; _result = std::move(result); + lockGuard.unlock(); _conditionVariable.notify_all(); } @@ -113,8 +114,9 @@ class ResultInProgress { // will terminate. void abort() { AD_CONTRACT_CHECK(_status == Status::IN_PROGRESS); - std::lock_guard lockGuard(_mutex); + std::unique_lock lockGuard(_mutex); _status = Status::ABORTED; + lockGuard.unlock(); _conditionVariable.notify_all(); } From 767a6cbd64cb8dab9060b6d0fcbe1b4df014231e Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 10:07:51 +0200 Subject: [PATCH 098/133] Another correction for reported result sizes --- src/engine/ExportQueryExecutionTrees.cpp | 9 +++------ src/engine/Operation.cpp | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index d634b7f7b0..b016cd1e91 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -663,11 +663,6 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( qet.getResult(query._limitOffset._limit.has_value()); result->logResultSize(); - std::optional resultSize = - query.hasSelectClause() && result->isDataEvaluated() - ? std::optional{result->idTable().size()} - : std::nullopt; - nlohmann::json j; j["query"] = query._originalString; @@ -698,7 +693,9 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( auto timeResultComputation = timeUntilFunctionCall + runtimeInformation.totalTime_; - j["resultsize"] = resultSize.value_or(j["res"].size()); + size_t resultSize = runtimeInformation.numRows_; + + j["resultsize"] = query.hasSelectClause() ? resultSize : j["res"].size(); j["time"]["total"] = std::to_string(requestTimer.msecs().count()) + "ms"; j["time"]["computeResult"] = std::to_string(timeResultComputation.count()) + "ms"; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index b390ca76b7..045f65a176 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -107,7 +107,7 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, std::chrono::milliseconds duration) { runtimeInfo().totalTime_ += duration; runtimeInfo().originalOperationTime_ = runtimeInfo().getOperationTime(); - runtimeInfo().numRows_ = idTable.numRows(); + runtimeInfo().numRows_ += idTable.numRows(); runtimeInfo().numCols_ = idTable.numColumns(); LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() << " x " << idTable.numColumns() << std::endl; From 8c834da64820d6d1988e98b16835d58b9c6a34c6 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 13:06:30 +0200 Subject: [PATCH 099/133] Rename member function --- src/engine/Operation.cpp | 6 +++--- src/engine/Operation.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 045f65a176..1b58e88399 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -137,7 +137,7 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, } // _____________________________________________________________________________ -CacheValue Operation::runComputationAndTransformToCache( +CacheValue Operation::runComputationAndPrepareForCache( const ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, bool pinned, bool isRoot) { auto& cache = _executionContext->getQueryTreeCache(); @@ -196,8 +196,8 @@ std::shared_ptr Operation::getResult( }); auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult, isRoot]() { - return runComputationAndTransformToCache(timer, computationMode, cacheKey, - pinResult, isRoot); + return runComputationAndPrepareForCache(timer, computationMode, cacheKey, + pinResult, isRoot); }; auto suitedForCache = [](const CacheValue& cacheValue) { diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 5c60b13270..5400506294 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -263,10 +263,10 @@ class Operation { ProtoResult runComputation(const ad_utility::Timer& timer, ComputationMode computationMode, bool isRoot); - CacheValue runComputationAndTransformToCache(const ad_utility::Timer& timer, - ComputationMode computationMode, - const std::string& cacheKey, - bool pinned, bool isRoot); + CacheValue runComputationAndPrepareForCache(const ad_utility::Timer& timer, + ComputationMode computationMode, + const std::string& cacheKey, + bool pinned, bool isRoot); // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. From 4555daba513432d25ecc6e1d578d6b70dd8daa57 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:58:30 +0200 Subject: [PATCH 100/133] compute value directly after waiting without success --- src/engine/Operation.cpp | 3 ++- src/util/ConcurrentCache.h | 28 +++++++++++++--------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 1b58e88399..1adeb5cbac 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -150,7 +150,8 @@ CacheValue Operation::runComputationAndPrepareForCache( pinned](Result aggregatedResult) { cache.tryInsertIfNotPresent( pinned, cacheKey, - CacheValue{std::move(aggregatedResult), *runtimeInfo}); + std::make_shared(std::move(aggregatedResult), + *runtimeInfo)); }); if (result.isDataEvaluated()) { auto resultNumRows = result.idTable().size(); diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 0fab7fa82e..68703b1f6a 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -208,15 +208,15 @@ class ConcurrentCache { suitedForCache); } - void tryInsertIfNotPresent(bool pinned, const Key& key, Value value) { + void tryInsertIfNotPresent(bool pinned, const Key& key, + std::shared_ptr value) { auto lockPtr = _cacheAndInProgressMap.wlock(); if (pinned) { if (!lockPtr->_cache.containsAndMakePinnedIfExists(key)) { - lockPtr->_cache.insertPinned(key, - std::make_shared(std::move(value))); + lockPtr->_cache.insertPinned(key, std::move(value)); } } else if (!lockPtr->_cache.contains(key)) { - lockPtr->_cache.insert(key, std::make_shared(std::move(value))); + lockPtr->_cache.insert(key, std::move(value)); } } @@ -385,11 +385,12 @@ class ConcurrentCache { shared_ptr result = make_shared(computeFunction()); if (suitedForCache(*result)) { moveFromInProgressToCache(key, result); + // Signal other threads who are waiting for the results. + resultInProgress->finish(result); } else { _cacheAndInProgressMap.wlock()->_inProgress.erase(key); + resultInProgress->finish(nullptr); } - // Signal other threads who are waiting for the results. - resultInProgress->finish(result); // result was not cached return {std::move(result), CacheStatus::computed}; } catch (...) { @@ -404,16 +405,13 @@ class ConcurrentCache { // return the result, we do not count this case as "cached" as we had to // wait. auto resultPointer = resultInProgress->getResult(); - if (resultPointer) { - return {std::move(resultPointer), CacheStatus::computed}; + if (!resultPointer) { + // Fallback computation + auto mutablePointer = make_shared(computeFunction()); + tryInsertIfNotPresent(pinned, key, mutablePointer); + resultPointer = std::move(mutablePointer); } - // TODO there's a small chance this will hang indefinitely if - // other processes keep submitting non-cacheable entries before this - // thread can acquire the lock to compute the entry on its own. - - // Retry if computed entry unsuited for caching - return computeOnceImpl(pinned, key, computeFunction, false, - suitedForCache); + return {std::move(resultPointer), CacheStatus::computed}; } } From d8f7fc19eb332f3f7fbd5e0c8ef204dfa2b9258e Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 20:03:45 +0200 Subject: [PATCH 101/133] Check size before aggregating id tables --- src/engine/Operation.cpp | 9 +++++++-- src/engine/Result.cpp | 27 +++++++++++++------------- src/engine/Result.h | 2 +- test/ExportQueryExecutionTreesTest.cpp | 14 ++++++------- test/SparqlDataTypesTest.cpp | 4 ++-- 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 1adeb5cbac..5b0000fb0c 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -143,8 +143,13 @@ CacheValue Operation::runComputationAndPrepareForCache( auto& cache = _executionContext->getQueryTreeCache(); auto result = Result::fromProtoResult( runComputation(timer, computationMode, isRoot), - [&cache](const IdTable& idTable) { - return cache.getMaxSizeSingleEntry() >= CacheValue::getSize(idTable); + [&cache](const std::optional& currentIdTable, + const IdTable& newIdTable) { + auto currentSize = currentIdTable.has_value() + ? CacheValue::getSize(currentIdTable.value()) + : 0_B; + return cache.getMaxSizeSingleEntry() >= + currentSize + CacheValue::getSize(newIdTable); }, [runtimeInfo = getRuntimeInfoPointer(), &cache, cacheKey, pinned](Result aggregatedResult) { diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index d8928d8479..a9a606a457 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -241,9 +241,11 @@ Result::Result(cppcoro::generator idTables, std::move(localVocab)}} {} // _____________________________________________________________________________ -Result Result::fromProtoResult(ProtoResult protoResult, - std::function fitsInCache, - std::function storeInCache) { +Result Result::fromProtoResult( + ProtoResult protoResult, + std::function&, const IdTable&)> + fitInCache, + std::function storeInCache) { if (protoResult.isDataEvaluated()) { return Result{std::move(protoResult.storage_.idTable()), std::move(protoResult.storage_.sortedBy_), @@ -254,18 +256,17 @@ Result Result::fromProtoResult(ProtoResult protoResult, return Result{ ad_utility::wrapGeneratorWithCache( std::move(protoResult.storage_.idTables()), - [fitsInCache = std::move(fitsInCache)]( + [fitInCache = std::move(fitInCache)]( std::optional& aggregate, const IdTable& newTable) { - if (aggregate.has_value()) { - aggregate.value().insertAtEnd(newTable); - } else { - aggregate.emplace(newTable.clone()); + bool doBothFitInCache = fitInCache(aggregate, newTable); + if (doBothFitInCache) { + if (aggregate.has_value()) { + aggregate.value().insertAtEnd(newTable); + } else { + aggregate.emplace(newTable.clone()); + } } - // TODO Review question: Should we compute the sizes - // individually and add the result together to then check the size - // at the cost of a more complex/less generic interface to avoid - // filling up memory that might be deallocated soon after. - return fitsInCache(aggregate.value()); + return doBothFitInCache; }, [storeInCache = std::move(storeInCache), sortedByCopy = std::move(sortedByCopy), diff --git a/src/engine/Result.h b/src/engine/Result.h index 10599e09d7..8173f0aa0e 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -197,7 +197,7 @@ class Result { Result& operator=(Result&& other) = default; static Result fromProtoResult(ProtoResult protoResult, - std::function fitsInCache, + std::function&, const IdTable&)> fitInCache, std::function storeInCache); // Const access to the underlying `IdTable`. diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 06b5ebff89..29069a2590 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1078,7 +1078,7 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { Result result = Result::fromProtoResult( ProtoResult{std::move(idTable), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -1110,7 +1110,7 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -1143,7 +1143,7 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); @@ -1175,7 +1175,7 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); @@ -1211,7 +1211,7 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); @@ -1251,7 +1251,7 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); @@ -1299,7 +1299,7 @@ TEST(ExportQueryExecutionTrees, Result result = Result::fromProtoResult( ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp index d7b3602ab4..9facd81408 100644 --- a/test/SparqlDataTypesTest.cpp +++ b/test/SparqlDataTypesTest.cpp @@ -19,7 +19,7 @@ struct ContextWrapper { Result _resultTable = Result::fromProtoResult( ProtoResult{ IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); // TODO `VariableToColumnMap` VariableToColumnMap _hashMap{}; @@ -31,7 +31,7 @@ struct ContextWrapper { void setIdTable(IdTable&& table) { _resultTable = Result::fromProtoResult( ProtoResult{std::move(table), {}, _resultTable.getSharedLocalVocab()}, - [](const auto&) { return false; }, [](auto) {}); + [](const auto&, const auto&) { return false; }, [](auto) {}); } }; From bbe10ef263d1bc1620107856e242eacf8dbb0c85 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 20:57:53 +0200 Subject: [PATCH 102/133] Merge Result and ProtoResult back together --- src/engine/Operation.cpp | 4 +- src/engine/Result.cpp | 186 +++++++++++-------------- src/engine/Result.h | 166 +++++++--------------- test/ExportQueryExecutionTreesTest.cpp | 28 +--- test/SparqlDataTypesTest.cpp | 11 +- 5 files changed, 143 insertions(+), 252 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 5b0000fb0c..bd38ac1d7e 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -141,8 +141,8 @@ CacheValue Operation::runComputationAndPrepareForCache( const ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, bool pinned, bool isRoot) { auto& cache = _executionContext->getQueryTreeCache(); - auto result = Result::fromProtoResult( - runComputation(timer, computationMode, isRoot), + auto result = runComputation(timer, computationMode, isRoot); + result.cacheDuringConsumption( [&cache](const std::optional& currentIdTable, const IdTable& newIdTable) { auto currentSize = currentIdTable.has_value() diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index a9a606a457..d5e4422345 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -29,54 +29,54 @@ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { } // _____________________________________________________________________________ -ProtoResult::ProtoResult(IdTable idTable, std::vector sortedBy, - SharedLocalVocabWrapper localVocab) - : storage_{StorageType{std::move(idTable), std::move(sortedBy), - std::move(localVocab.localVocab_)}} { - AD_CONTRACT_CHECK(storage_.localVocab_ != nullptr); - validateIdTable(storage_.idTable(), storage_.sortedBy_); +Result::Result(IdTable idTable, std::vector sortedBy, + SharedLocalVocabWrapper localVocab) + : data_{std::move(idTable)}, + sortedBy_{std::move(sortedBy)}, + localVocab_{std::move(localVocab.localVocab_)} { + AD_CONTRACT_CHECK(localVocab_ != nullptr); + validateIdTable(this->idTable(), sortedBy_); } // _____________________________________________________________________________ -ProtoResult::ProtoResult(IdTable idTable, std::vector sortedBy, - LocalVocab&& localVocab) - : ProtoResult{std::move(idTable), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} +Result::Result(IdTable idTable, std::vector sortedBy, + LocalVocab&& localVocab) + : Result{std::move(idTable), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}} {} // _____________________________________________________________________________ -ProtoResult::ProtoResult(cppcoro::generator idTables, - std::vector sortedBy, - SharedLocalVocabWrapper localVocab) - : storage_{ - StorageType{[](auto idTables, - auto sortedBy) mutable -> cppcoro::generator { - for (IdTable& idTable : idTables) { - validateIdTable(idTable, sortedBy); - co_yield std::move(idTable); - } - }(std::move(idTables), sortedBy), - std::move(sortedBy), std::move(localVocab.localVocab_)}} { - AD_CONTRACT_CHECK(storage_.localVocab_ != nullptr); +Result::Result(cppcoro::generator idTables, + std::vector sortedBy, + SharedLocalVocabWrapper localVocab) + : data_{[](auto idTables, + auto sortedBy) mutable -> cppcoro::generator { + for (IdTable& idTable : idTables) { + validateIdTable(idTable, sortedBy); + co_yield std::move(idTable); + } + }(std::move(idTables), sortedBy)}, + sortedBy_{std::move(sortedBy)}, + localVocab_{std::move(localVocab.localVocab_)} { + AD_CONTRACT_CHECK(localVocab_ != nullptr); } // _____________________________________________________________________________ -ProtoResult::ProtoResult(cppcoro::generator idTables, - std::vector sortedBy, - LocalVocab&& localVocab) - : ProtoResult{std::move(idTables), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} +Result::Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocab&& localVocab) + : Result{std::move(idTables), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}} {} // _____________________________________________________________________________ -void ProtoResult::applyLimitOffset( +void Result::applyLimitOffset( const LimitOffsetClause& limitOffset, std::function limitTimeCallback) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). AD_CONTRACT_CHECK(limitTimeCallback); - if (storage_.isDataEvaluated()) { + if (isDataEvaluated()) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; - modifyIdTable(storage_.idTable(), limitOffset); + modifyIdTable(std::get(data_), limitOffset); limitTimeCallback(limitTimer.msecs()); } else { auto generator = @@ -104,15 +104,14 @@ void ProtoResult::applyLimitOffset( break; } } - }(std::move(storage_.idTables()), limitOffset, - std::move(limitTimeCallback)); - storage_.idTables() = std::move(generator); + }(std::move(idTables()), limitOffset, std::move(limitTimeCallback)); + data_ = std::move(generator); } } // _____________________________________________________________________________ -void ProtoResult::enforceLimitOffset(const LimitOffsetClause& limitOffset) { - if (storage_.isDataEvaluated()) { +void Result::enforceLimitOffset(const LimitOffsetClause& limitOffset) { + if (isDataEvaluated()) { uint64_t numRows = idTable().numRows(); auto limit = limitOffset._limit; AD_CONTRACT_CHECK(!limit.has_value() || numRows <= limit.value()); @@ -128,13 +127,13 @@ void ProtoResult::enforceLimitOffset(const LimitOffsetClause& limitOffset) { co_yield std::move(idTable); } AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); - }(std::move(storage_.idTables()), limitOffset); - storage_.idTables() = std::move(generator); + }(std::move(idTables()), limitOffset); + data_ = std::move(generator); } } // _____________________________________________________________ -void ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { +void Result::checkDefinedness(const VariableToColumnMap& varColMap) { auto performCheck = [](const auto& map, IdTable& idTable) { DatatypeCountsPerColumn datatypeCountsPerColumn = computeDatatypeCountsPerColumn(idTable); @@ -148,7 +147,7 @@ void ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { }); }; if (isDataEvaluated()) { - AD_EXPENSIVE_CHECK(performCheck(varColMap, storage_.idTable())); + AD_EXPENSIVE_CHECK(performCheck(varColMap, std::get(data_))); } else { auto generator = [](cppcoro::generator original, VariableToColumnMap varColMap, @@ -163,15 +162,15 @@ void ProtoResult::checkDefinedness(const VariableToColumnMap& varColMap) { } co_yield std::move(idTable); } - }(std::move(storage_.idTables()), varColMap, std::move(performCheck)); - storage_.idTables() = std::move(generator); + }(std::move(idTables()), varColMap, std::move(performCheck)); + data_ = std::move(generator); } } // _____________________________________________________________________________ -void ProtoResult::runOnNewChunkComputed( +void Result::runOnNewChunkComputed( std::function function) { - AD_CONTRACT_CHECK(!storage_.isDataEvaluated()); + AD_CONTRACT_CHECK(!isDataEvaluated()); auto generator = [](cppcoro::generator original, std::function @@ -182,12 +181,12 @@ void ProtoResult::runOnNewChunkComputed( co_yield std::move(idTable); timer.start(); } - }(std::move(storage_.idTables()), std::move(function)); - storage_.idTables() = std::move(generator); + }(std::move(idTables()), std::move(function)); + data_ = std::move(generator); } // _____________________________________________________________________________ -auto ProtoResult::computeDatatypeCountsPerColumn(IdTable& idTable) +auto Result::computeDatatypeCountsPerColumn(IdTable& idTable) -> DatatypeCountsPerColumn { DatatypeCountsPerColumn types; types.resize(idTable.numColumns()); @@ -202,8 +201,8 @@ auto ProtoResult::computeDatatypeCountsPerColumn(IdTable& idTable) } // _____________________________________________________________________________ -void ProtoResult::validateIdTable(const IdTable& idTable, - const std::vector& sortedBy) { +void Result::validateIdTable(const IdTable& idTable, + const std::vector& sortedBy) { AD_CONTRACT_CHECK(std::ranges::all_of(sortedBy, [&idTable](size_t numCols) { return numCols < idTable.numColumns(); })); @@ -221,70 +220,50 @@ void ProtoResult::validateIdTable(const IdTable& idTable, } // _____________________________________________________________________________ -const IdTable& ProtoResult::idTable() const { return storage_.idTable(); } - -// _____________________________________________________________________________ -bool ProtoResult::isDataEvaluated() const noexcept { - return storage_.isDataEvaluated(); +const IdTable& Result::idTable() const { + AD_CONTRACT_CHECK(isDataEvaluated()); + return std::get(data_); } // _____________________________________________________________________________ -Result::Result(IdTable idTable, std::vector sortedBy, - LocalVocabPtr localVocab) - : storage_{StorageType{std::move(idTable), std::move(sortedBy), - std::move(localVocab)}} {} +cppcoro::generator& Result::idTables() const { + AD_CONTRACT_CHECK(!isDataEvaluated()); + return std::get>(data_); +} // _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab) - : storage_{StorageType{std::move(idTables), std::move(sortedBy), - std::move(localVocab)}} {} +bool Result::isDataEvaluated() const noexcept { + return std::holds_alternative(data_); +} // _____________________________________________________________________________ -Result Result::fromProtoResult( - ProtoResult protoResult, +void Result::cacheDuringConsumption( std::function&, const IdTable&)> fitInCache, std::function storeInCache) { - if (protoResult.isDataEvaluated()) { - return Result{std::move(protoResult.storage_.idTable()), - std::move(protoResult.storage_.sortedBy_), - std::move(protoResult.storage_.localVocab_)}; + if (isDataEvaluated()) { + return; } - auto sortedByCopy = protoResult.storage_.sortedBy_; - auto localVocabReference = protoResult.storage_.localVocab_; - return Result{ - ad_utility::wrapGeneratorWithCache( - std::move(protoResult.storage_.idTables()), - [fitInCache = std::move(fitInCache)]( - std::optional& aggregate, const IdTable& newTable) { - bool doBothFitInCache = fitInCache(aggregate, newTable); - if (doBothFitInCache) { - if (aggregate.has_value()) { - aggregate.value().insertAtEnd(newTable); - } else { - aggregate.emplace(newTable.clone()); - } - } - return doBothFitInCache; - }, - [storeInCache = std::move(storeInCache), - sortedByCopy = std::move(sortedByCopy), - localVocabReference = - std::move(localVocabReference)](IdTable idTable) mutable { - storeInCache(Result{std::move(idTable), std::move(sortedByCopy), - std::move(localVocabReference)}); - }), - std::move(protoResult.storage_.sortedBy_), - std::move(protoResult.storage_.localVocab_)}; -} -// _____________________________________________________________________________ -const IdTable& Result::idTable() const { return storage_.idTable(); } - -// _____________________________________________________________________________ -cppcoro::generator& Result::idTables() const { - return storage_.idTables(); + data_ = ad_utility::wrapGeneratorWithCache( + std::move(idTables()), + [fitInCache = std::move(fitInCache)](std::optional& aggregate, + const IdTable& newTable) { + bool doBothFitInCache = fitInCache(aggregate, newTable); + if (doBothFitInCache) { + if (aggregate.has_value()) { + aggregate.value().insertAtEnd(newTable); + } else { + aggregate.emplace(newTable.clone()); + } + } + return doBothFitInCache; + }, + [storeInCache = std::move(storeInCache), sortedBy = sortedBy_, + localVocab = localVocab_](IdTable idTable) mutable { + storeInCache(Result{std::move(idTable), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(localVocab)}}); + }); } // _____________________________________________________________________________ @@ -297,11 +276,6 @@ auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) // _____________________________________________________________________________ LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } -// _____________________________________________________________________________ -bool Result::isDataEvaluated() const noexcept { - return storage_.isDataEvaluated(); -} - // _____________________________________________________________________________ void Result::logResultSize() const { if (isDataEvaluated()) { diff --git a/src/engine/Result.h b/src/engine/Result.h index 8173f0aa0e..99e25fef3e 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -17,14 +17,16 @@ #include "parser/data/LimitOffsetClause.h" #include "util/CacheableGenerator.h" -template -class ResultStorage { - friend class ProtoResult; - friend class Result; - - using Data = std::variant; - // The actual entries. - Data data_; +// The result of an `Operation`. This is the class QLever uses for all +// intermediate or final results when processing a SPARQL query. The actual data +// is either a table and contained in the member `idTable()` or can be consumed +// through a generator via `idTables()` when it is supposed to be lazily +// evaluated. +class Result { + private: + using Data = std::variant>; + // The actual entries. Needs to be mutable in order to consume a const entry. + mutable Data data_; // The column indices by which the result is sorted (primary sort key first). // Empty if the result is not sorted on any column. @@ -34,42 +36,6 @@ class ResultStorage { std::shared_ptr localVocab_ = std::make_shared(); - ResultStorage(Data data, std::vector sortedBy, - std::shared_ptr localVocab) - : data_{std::move(data)}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab)} {} - - bool isDataEvaluated() const noexcept { - return std::holds_alternative(data_); - } - - IdTableType& idTable() { - AD_CONTRACT_CHECK(isDataEvaluated()); - return std::get(data_); - } - - const IdTableType& idTable() const { - AD_CONTRACT_CHECK(isDataEvaluated()); - return std::get(data_); - } - - GeneratorType& idTables() { - AD_CONTRACT_CHECK(!isDataEvaluated()); - return std::get(data_); - } - - const GeneratorType& idTables() const { - AD_CONTRACT_CHECK(!isDataEvaluated()); - return std::get(data_); - } -}; - -class ProtoResult { - friend class Result; - using StorageType = ResultStorage>; - StorageType storage_; - using LocalVocabPtr = std::shared_ptr; // Note: If additional members and invariants are added to the class (for @@ -91,7 +57,6 @@ class ProtoResult { std::shared_ptr localVocab_; explicit SharedLocalVocabWrapper(LocalVocabPtr localVocab) : localVocab_{std::move(localVocab)} {} - friend ProtoResult; friend class Result; public: @@ -103,6 +68,20 @@ class ProtoResult { std::make_shared(std::move(localVocab))} {} }; + // For each column in the result (the entries in the outer `vector`) and for + // each `Datatype` (the entries of the inner `array`), store the information + // how many entries of that datatype are stored in the column. + using DatatypeCountsPerColumn = std::vector< + std::array(Datatype::MaxValue) + 1>>; + + // Get the information, which columns stores how many entries of each + // datatype. + static DatatypeCountsPerColumn computeDatatypeCountsPerColumn( + IdTable& idTable); + + static void validateIdTable(const IdTable& idTable, + const std::vector& sortedBy); + public: // Construct from the given arguments (see above) and check the following // invariants: `localVocab` must not be `nullptr` and each entry of `sortedBy` @@ -113,28 +92,21 @@ class ProtoResult { // The first overload of the constructor is for local vocabs that are shared // with another `Result` via the `getSharedLocalVocab...` methods below. // The second overload is for newly created local vocabularies. - ProtoResult(IdTable idTable, std::vector sortedBy, - SharedLocalVocabWrapper localVocab); - ProtoResult(IdTable idTable, std::vector sortedBy, - LocalVocab&& localVocab); - ProtoResult(cppcoro::generator idTables, - std::vector sortedBy, - SharedLocalVocabWrapper localVocab); - ProtoResult(cppcoro::generator idTables, - std::vector sortedBy, LocalVocab&& localVocab); - - public: - ProtoResult(const ProtoResult& other) = delete; - ProtoResult& operator=(const ProtoResult& other) = delete; - - ProtoResult(ProtoResult&& other) = default; - ProtoResult& operator=(ProtoResult&& other) = default; + Result(IdTable idTable, std::vector sortedBy, + SharedLocalVocabWrapper localVocab); + Result(IdTable idTable, std::vector sortedBy, + LocalVocab&& localVocab); + Result(cppcoro::generator idTables, + std::vector sortedBy, SharedLocalVocabWrapper localVocab); + Result(cppcoro::generator idTables, + std::vector sortedBy, LocalVocab&& localVocab); + // Prevent accidental copying of a result table. + Result(const Result& other) = delete; + Result& operator=(const Result& other) = delete; - // For each column in the result (the entries in the outer `vector`) and for - // each `Datatype` (the entries of the inner `array`), store the information - // how many entries of that datatype are stored in the column. - using DatatypeCountsPerColumn = std::vector< - std::array(Datatype::MaxValue) + 1>>; + // Moving of a result table is OK. + Result(Result&& other) = default; + Result& operator=(Result&& other) = default; // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. // Note: If additional members and invariants are added to the class (for @@ -155,50 +127,10 @@ class ProtoResult { void runOnNewChunkComputed( std::function function); - private: - // Get the information, which columns stores how many entries of each - // datatype. - static DatatypeCountsPerColumn computeDatatypeCountsPerColumn( - IdTable& idTable); - - static void validateIdTable(const IdTable& idTable, - const std::vector& sortedBy); - - public: - const IdTable& idTable() const; - - bool isDataEvaluated() const noexcept; -}; - -// The result of an `Operation`. This is the class QLever uses for all -// intermediate or final results when processing a SPARQL query. The actual data -// is always a table and contained in the member `idTable()`. -class Result { - private: - using StorageType = ResultStorage>; - mutable StorageType storage_; - - using LocalVocabPtr = std::shared_ptr; - - using SharedLocalVocabWrapper = ProtoResult::SharedLocalVocabWrapper; - - Result(IdTable idTable, std::vector sortedBy, - LocalVocabPtr localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab); - - public: - // Prevent accidental copying of a result table. - Result(const Result& other) = delete; - Result& operator=(const Result& other) = delete; - - // Moving of a result table is OK. - Result(Result&& other) = default; - Result& operator=(Result&& other) = default; - - static Result fromProtoResult(ProtoResult protoResult, - std::function&, const IdTable&)> fitInCache, - std::function storeInCache); + void cacheDuringConsumption( + std::function&, const IdTable&)> + fitInCache, + std::function storeInCache); // Const access to the underlying `IdTable`. const IdTable& idTable() const; @@ -207,9 +139,7 @@ class Result { cppcoro::generator& idTables() const; // Const access to the columns by which the `idTable()` is sorted. - const std::vector& sortedBy() const { - return storage_.sortedBy_; - } + const std::vector& sortedBy() const { return sortedBy_; } // Get the local vocabulary of this result, used for lookup only. // @@ -222,12 +152,12 @@ class Result { // Filter::computeFilterImpl (evaluationContext) // Variable::evaluate (idToStringAndType) // - const LocalVocab& localVocab() const { return *storage_.localVocab_; } + const LocalVocab& localVocab() const { return *localVocab_; } // Get the local vocab as a shared pointer to const. This can be used if one // result has the same local vocab as one of its child results. SharedLocalVocabWrapper getSharedLocalVocab() const { - return SharedLocalVocabWrapper{storage_.localVocab_}; + return SharedLocalVocabWrapper{localVocab_}; } // Like `getSharedLocalVocabFrom`, but takes more than one result and merges @@ -241,7 +171,7 @@ class Result { static SharedLocalVocabWrapper getMergedLocalVocab(R&& subResults) { std::vector vocabs; for (const Result& table : subResults) { - vocabs.push_back(std::to_address(table.storage_.localVocab_)); + vocabs.push_back(std::to_address(table.localVocab_)); } return SharedLocalVocabWrapper{LocalVocab::merge(vocabs)}; } @@ -262,3 +192,7 @@ class Result { // The first rows of the result and its total size (for debugging). string asDebugString() const; }; + +// Class alias to conceptually differentiate between Results that produce +// values and Results meant to be consumed. +using ProtoResult = Result; diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 29069a2590..5629137de6 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1076,9 +1076,7 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { idTable.push_back({Id::makeFromInt(42)}); idTable.push_back({Id::makeFromInt(1337)}); - Result result = Result::fromProtoResult( - ProtoResult{std::move(idTable), {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result result{std::move(idTable), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -1108,9 +1106,7 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { co_yield std::move(idTable2); }(); - Result result = Result::fromProtoResult( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); @@ -1141,9 +1137,7 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { co_yield std::move(idTable1); }(); - Result result = Result::fromProtoResult( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); @@ -1173,9 +1167,7 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable2); }(); - Result result = Result::fromProtoResult( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); @@ -1209,9 +1201,7 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable2); }(); - Result result = Result::fromProtoResult( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); @@ -1249,9 +1239,7 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable2); }(); - Result result = Result::fromProtoResult( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); @@ -1297,9 +1285,7 @@ TEST(ExportQueryExecutionTrees, co_yield std::move(idTable3); }(); - Result result = Result::fromProtoResult( - ProtoResult{std::move(tableGenerator), {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp index 9facd81408..a84e39f8d4 100644 --- a/test/SparqlDataTypesTest.cpp +++ b/test/SparqlDataTypesTest.cpp @@ -16,10 +16,8 @@ using enum PositionInTriple; namespace { struct ContextWrapper { Index _index{ad_utility::makeUnlimitedAllocator()}; - Result _resultTable = Result::fromProtoResult( - ProtoResult{ - IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + Result _resultTable{ + IdTable{ad_utility::testing::makeAllocator()}, {}, LocalVocab{}}; // TODO `VariableToColumnMap` VariableToColumnMap _hashMap{}; @@ -29,9 +27,8 @@ struct ContextWrapper { } void setIdTable(IdTable&& table) { - _resultTable = Result::fromProtoResult( - ProtoResult{std::move(table), {}, _resultTable.getSharedLocalVocab()}, - [](const auto&, const auto&) { return false; }, [](auto) {}); + _resultTable = + Result{std::move(table), {}, _resultTable.getSharedLocalVocab()}; } }; From 345d79c5c0017cff15fc00e3f9ca4e993ae7da8c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 14 Aug 2024 21:14:46 +0200 Subject: [PATCH 103/133] Re-arrange functions to make diff smaller --- src/engine/Result.cpp | 102 +++++++++++++++++++++--------------------- src/engine/Result.h | 39 ++++++++-------- 2 files changed, 70 insertions(+), 71 deletions(-) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index d5e4422345..921e1703a6 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -13,21 +13,28 @@ #include "util/Timer.h" // _____________________________________________________________________________ -void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { - std::ranges::for_each( - idTable.getColumns(), - [offset = limitOffset.actualOffset(idTable.numRows()), - upperBound = - limitOffset.upperBound(idTable.numRows())](std::span column) { - std::shift_left(column.begin(), column.begin() + upperBound, offset); - }); - // Resize the `IdTable` if necessary. - size_t targetSize = limitOffset.actualSize(idTable.numRows()); - AD_CORRECTNESS_CHECK(targetSize <= idTable.numRows()); - idTable.resize(targetSize); - idTable.shrinkToFit(); +string Result::asDebugString() const { + std::ostringstream os; + os << "First (up to) 5 rows of result with size:\n"; + for (size_t i = 0; i < std::min(5, idTable().size()); ++i) { + for (size_t j = 0; j < idTable().numColumns(); ++j) { + os << idTable()(i, j) << '\t'; + } + os << '\n'; + } + return std::move(os).str(); } +// _____________________________________________________________________________ +auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) + -> SharedLocalVocabWrapper { + return getMergedLocalVocab( + std::array{std::cref(result1), std::cref(result2)}); +} + +// _____________________________________________________________________________ +LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } + // _____________________________________________________________________________ Result::Result(IdTable idTable, std::vector sortedBy, SharedLocalVocabWrapper localVocab) @@ -66,6 +73,22 @@ Result::Result(cppcoro::generator idTables, : Result{std::move(idTables), std::move(sortedBy), SharedLocalVocabWrapper{std::move(localVocab)}} {} +// _____________________________________________________________________________ +void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { + std::ranges::for_each( + idTable.getColumns(), + [offset = limitOffset.actualOffset(idTable.numRows()), + upperBound = + limitOffset.upperBound(idTable.numRows())](std::span column) { + std::shift_left(column.begin(), column.begin() + upperBound, offset); + }); + // Resize the `IdTable` if necessary. + size_t targetSize = limitOffset.actualSize(idTable.numRows()); + AD_CORRECTNESS_CHECK(targetSize <= idTable.numRows()); + idTable.resize(targetSize); + idTable.shrinkToFit(); +} + // _____________________________________________________________________________ void Result::applyLimitOffset( const LimitOffsetClause& limitOffset, @@ -132,6 +155,21 @@ void Result::enforceLimitOffset(const LimitOffsetClause& limitOffset) { } } +// _____________________________________________________________________________ +auto Result::computeDatatypeCountsPerColumn(IdTable& idTable) + -> DatatypeCountsPerColumn { + DatatypeCountsPerColumn types; + types.resize(idTable.numColumns()); + for (size_t i = 0; i < idTable.numColumns(); ++i) { + const auto& col = idTable.getColumn(i); + auto& datatypes = types.at(i); + for (Id id : col) { + ++datatypes[static_cast(id.getDatatype())]; + } + } + return types; +} + // _____________________________________________________________ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { auto performCheck = [](const auto& map, IdTable& idTable) { @@ -185,21 +223,6 @@ void Result::runOnNewChunkComputed( data_ = std::move(generator); } -// _____________________________________________________________________________ -auto Result::computeDatatypeCountsPerColumn(IdTable& idTable) - -> DatatypeCountsPerColumn { - DatatypeCountsPerColumn types; - types.resize(idTable.numColumns()); - for (size_t i = 0; i < idTable.numColumns(); ++i) { - const auto& col = idTable.getColumn(i); - auto& datatypes = types.at(i); - for (Id id : col) { - ++datatypes[static_cast(id.getDatatype())]; - } - } - return types; -} - // _____________________________________________________________________________ void Result::validateIdTable(const IdTable& idTable, const std::vector& sortedBy) { @@ -266,16 +289,6 @@ void Result::cacheDuringConsumption( }); } -// _____________________________________________________________________________ -auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) - -> SharedLocalVocabWrapper { - return getMergedLocalVocab( - std::array{std::cref(result1), std::cref(result2)}); -} - -// _____________________________________________________________________________ -LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } - // _____________________________________________________________________________ void Result::logResultSize() const { if (isDataEvaluated()) { @@ -285,16 +298,3 @@ void Result::logResultSize() const { LOG(INFO) << "Result has unknown size (not computed yet)" << std::endl; } } - -// _____________________________________________________________________________ -string Result::asDebugString() const { - std::ostringstream os; - os << "First (up to) 5 rows of result with size:\n"; - for (size_t i = 0; i < std::min(5, idTable().size()); ++i) { - for (size_t j = 0; j < idTable().numColumns(); ++j) { - os << idTable()(i, j) << '\t'; - } - os << '\n'; - } - return std::move(os).str(); -} diff --git a/src/engine/Result.h b/src/engine/Result.h index 99e25fef3e..c7f58ad636 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -32,12 +32,11 @@ class Result { // Empty if the result is not sorted on any column. std::vector sortedBy_; - // The local vocabulary of the result. - std::shared_ptr localVocab_ = - std::make_shared(); - using LocalVocabPtr = std::shared_ptr; + // The local vocabulary of the result. + LocalVocabPtr localVocab_ = std::make_shared(); + // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those remain valid after calling non-const function like @@ -108,22 +107,6 @@ class Result { Result(Result&& other) = default; Result& operator=(Result&& other) = default; - // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. - // Note: If additional members and invariants are added to the class (for - // example information about the datatypes in each column) make sure that - // those are still correct after performing this operation. - void applyLimitOffset( - const LimitOffsetClause& limitOffset, - std::function limitTimeCallback); - - void enforceLimitOffset(const LimitOffsetClause& limitOffset); - - // Check that if the `varColMap` guarantees that a column is always defined - // (i.e. that is contains no single undefined value) that there are indeed no - // undefined values in the `data_` of this result. Return `true` iff the - // check is successful. - void checkDefinedness(const VariableToColumnMap& varColMap); - void runOnNewChunkComputed( std::function function); @@ -191,6 +174,22 @@ class Result { // The first rows of the result and its total size (for debugging). string asDebugString() const; + + // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. + // Note: If additional members and invariants are added to the class (for + // example information about the datatypes in each column) make sure that + // those are still correct after performing this operation. + void applyLimitOffset( + const LimitOffsetClause& limitOffset, + std::function limitTimeCallback); + + void enforceLimitOffset(const LimitOffsetClause& limitOffset); + + // Check that if the `varColMap` guarantees that a column is always defined + // (i.e. that is contains no single undefined value) that there are indeed no + // undefined values in the `data_` of this result. Return `true` iff the + // check is successful. + void checkDefinedness(const VariableToColumnMap& varColMap); }; // Class alias to conceptually differentiate between Results that produce From a101433a35f80c2da58f136235f85f7d8ad62aa5 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 14:57:20 +0200 Subject: [PATCH 104/133] Fix bugs with limit and optimize iteration --- src/engine/ExportQueryExecutionTrees.cpp | 208 ++++++++++++----------- src/engine/ExportQueryExecutionTrees.h | 6 +- test/ExportQueryExecutionTreesTest.cpp | 108 +++++++++--- 3 files changed, 198 insertions(+), 124 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index b016cd1e91..02f4cdf6c0 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -28,20 +28,26 @@ cppcoro::generator ExportQueryExecutionTrees::getIdTables( // Return a range that contains the indices of the rows that have to be exported // from the `idTable` given the `LimitOffsetClause`. It takes into account the // LIMIT, the OFFSET, and the actual size of the `idTable` -cppcoro::generator +cppcoro::generator ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, const Result& result) { + if (limitOffset._limit.value_or(1) == 0) { + co_return; + } for (const IdTable& idTable : getIdTables(result)) { uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); - for (size_t index = currentOffset; index < upperBound; index++) { - co_yield {index, idTable}; + if (currentOffset != upperBound) { + co_yield {idTable, std::views::iota(currentOffset, upperBound)}; } limitOffset._offset -= currentOffset; if (limitOffset._limit.has_value()) { limitOffset._limit = limitOffset._limit.value() - (upperBound - currentOffset); } + if (limitOffset._limit.value_or(1) == 0) { + break; + } } } @@ -52,22 +58,24 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { - ConstructQueryExportContext context{i, idTable, result->localVocab(), - qet.getVariableColumns(), - qet.getQec()->getIndex()}; - using enum PositionInTriple; - for (const auto& triple : constructTriples) { - auto subject = triple[0].evaluate(context, SUBJECT); - auto predicate = triple[1].evaluate(context, PREDICATE); - auto object = triple[2].evaluate(context, OBJECT); - if (!subject.has_value() || !predicate.has_value() || - !object.has_value()) { - continue; + for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (size_t i : range) { + ConstructQueryExportContext context{i, idTable, result->localVocab(), + qet.getVariableColumns(), + qet.getQec()->getIndex()}; + using enum PositionInTriple; + for (const auto& triple : constructTriples) { + auto subject = triple[0].evaluate(context, SUBJECT); + auto predicate = triple[1].evaluate(context, PREDICATE); + auto object = triple[2].evaluate(context, OBJECT); + if (!subject.has_value() || !predicate.has_value() || + !object.has_value()) { + continue; + } + co_yield {std::move(subject.value()), std::move(predicate.value()), + std::move(object.value())}; + cancellationHandle->throwIfCancelled(); } - co_yield {std::move(subject.value()), std::move(predicate.value()), - std::move(object.value())}; - cancellationHandle->throwIfCancelled(); } } } @@ -135,31 +143,33 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( AD_CORRECTNESS_CHECK(result != nullptr); nlohmann::json json = nlohmann::json::array(); - for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *result)) { - // We need the explicit `array` constructor for the special case of zero - // variables. - json.push_back(nlohmann::json::array()); - auto& row = json.back(); - for (const auto& opt : columns) { - if (!opt) { - row.emplace_back(nullptr); - continue; - } - const auto& currentId = idTable(rowIndex, opt->columnIndex_); - const auto& optionalStringAndXsdType = idToStringAndType( - qet.getQec()->getIndex(), currentId, result->localVocab()); - if (!optionalStringAndXsdType.has_value()) { - row.emplace_back(nullptr); - continue; - } - const auto& [stringValue, xsdType] = optionalStringAndXsdType.value(); - if (xsdType) { - row.emplace_back('"' + stringValue + "\"^^<" + xsdType + '>'); - } else { - row.emplace_back(stringValue); + for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (size_t rowIndex : range) { + // We need the explicit `array` constructor for the special case of zero + // variables. + json.push_back(nlohmann::json::array()); + auto& row = json.back(); + for (const auto& opt : columns) { + if (!opt) { + row.emplace_back(nullptr); + continue; + } + const auto& currentId = idTable(rowIndex, opt->columnIndex_); + const auto& optionalStringAndXsdType = idToStringAndType( + qet.getQec()->getIndex(), currentId, result->localVocab()); + if (!optionalStringAndXsdType.has_value()) { + row.emplace_back(nullptr); + continue; + } + const auto& [stringValue, xsdType] = optionalStringAndXsdType.value(); + if (xsdType) { + row.emplace_back('"' + stringValue + "\"^^<" + xsdType + '>'); + } else { + row.emplace_back(stringValue); + } } + cancellationHandle->throwIfCancelled(); } - cancellationHandle->throwIfCancelled(); } return json; } @@ -375,32 +385,34 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - for (auto [rowIndex, idTable] : getRowIndices(limitAndOffset, *result)) { - // TODO: ordered_json` entries are ordered alphabetically, but insertion - // order would be preferable. - nlohmann::ordered_json binding; - for (const auto& column : columns) { - const auto& currentId = idTable(rowIndex, column->columnIndex_); - const auto& optionalValue = idToStringAndType( - qet.getQec()->getIndex(), currentId, result->localVocab()); - if (!optionalValue.has_value()) { - continue; - } - const auto& [stringValue, xsdType] = optionalValue.value(); - nlohmann::ordered_json b; - if (!xsdType) { - // No xsdType, this means that `stringValue` is a plain string literal - // or entity. - b = stringToBinding(stringValue); - } else { - b["value"] = stringValue; - b["type"] = "literal"; - b["datatype"] = xsdType; + for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (size_t rowIndex : range) { + // TODO: ordered_json` entries are ordered alphabetically, but insertion + // order would be preferable. + nlohmann::ordered_json binding; + for (const auto& column : columns) { + const auto& currentId = idTable(rowIndex, column->columnIndex_); + const auto& optionalValue = idToStringAndType( + qet.getQec()->getIndex(), currentId, result->localVocab()); + if (!optionalValue.has_value()) { + continue; + } + const auto& [stringValue, xsdType] = optionalValue.value(); + nlohmann::ordered_json b; + if (!xsdType) { + // No xsdType, this means that `stringValue` is a plain string literal + // or entity. + b = stringToBinding(stringValue); + } else { + b["value"] = stringValue; + b["type"] = "literal"; + b["datatype"] = xsdType; + } + binding[column->variable_] = std::move(b); } - binding[column->variable_] = std::move(b); + bindings.emplace_back(std::move(binding)); + cancellationHandle->throwIfCancelled(); } - bindings.emplace_back(std::move(binding)); - cancellationHandle->throwIfCancelled(); } resultJson["results"]["bindings"] = std::move(bindings); return resultJson; @@ -450,15 +462,17 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { - for (const auto& columnIndex : selectedColumnIndices) { - if (columnIndex.has_value()) { - co_yield std::string_view{reinterpret_cast(&idTable( - i, columnIndex.value().columnIndex_)), - sizeof(Id)}; + for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (size_t i : range) { + for (const auto& columnIndex : selectedColumnIndices) { + if (columnIndex.has_value()) { + co_yield std::string_view{reinterpret_cast(&idTable( + i, columnIndex.value().columnIndex_)), + sizeof(Id)}; + } } + cancellationHandle->throwIfCancelled(); } - cancellationHandle->throwIfCancelled(); } co_return; } @@ -478,22 +492,24 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { - for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { - if (selectedColumnIndices[j].has_value()) { - const auto& val = selectedColumnIndices[j].value(); - Id id = idTable(i, val.columnIndex_); - auto optionalStringAndType = - idToStringAndType( - qet.getQec()->getIndex(), id, result->localVocab(), - escapeFunction); - if (optionalStringAndType.has_value()) [[likely]] { - co_yield optionalStringAndType.value().first; + for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (size_t i : range) { + for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { + if (selectedColumnIndices[j].has_value()) { + const auto& val = selectedColumnIndices[j].value(); + Id id = idTable(i, val.columnIndex_); + auto optionalStringAndType = + idToStringAndType( + qet.getQec()->getIndex(), id, result->localVocab(), + escapeFunction); + if (optionalStringAndType.has_value()) [[likely]] { + co_yield optionalStringAndType.value().first; + } } + co_yield j + 1 < selectedColumnIndices.size() ? separator : '\n'; } - co_yield j + 1 < selectedColumnIndices.size() ? separator : '\n'; + cancellationHandle->throwIfCancelled(); } - cancellationHandle->throwIfCancelled(); } LOG(DEBUG) << "Done creating readable result.\n"; } @@ -601,18 +617,20 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (auto [i, idTable] : getRowIndices(limitAndOffset, *result)) { - co_yield "\n "; - for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { - if (selectedColumnIndices[j].has_value()) { - const auto& val = selectedColumnIndices[j].value(); - Id id = idTable(i, val.columnIndex_); - co_yield idToXMLBinding(val.variable_, id, qet.getQec()->getIndex(), - result->localVocab()); + for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (size_t i : range) { + co_yield "\n "; + for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { + if (selectedColumnIndices[j].has_value()) { + const auto& val = selectedColumnIndices[j].value(); + Id id = idTable(i, val.columnIndex_); + co_yield idToXMLBinding(val.variable_, id, qet.getQec()->getIndex(), + result->localVocab()); + } } + co_yield "\n "; + cancellationHandle->throwIfCancelled(); } - co_yield "\n "; - cancellationHandle->throwIfCancelled(); } co_yield "\n"; co_yield "\n"; diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index a85e35e546..b02bef7a98 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -178,16 +178,16 @@ class ExportQueryExecutionTrees { const parsedQuery::SelectClause& selectClause, LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); - struct IndexWithTable { - size_t index_; + struct TableWithRange { const IdTable& idTable_; + std::ranges::iota_view view_; }; static cppcoro::generator getIdTables(const Result& result); // Return a range that contains the indices of the rows that have to be // exported from the `idTable` given the `LimitOffsetClause`. It takes into // account the LIMIT, the OFFSET, and the actual size of the `idTable` - static cppcoro::generator getRowIndices( + static cppcoro::generator getRowIndices( LimitOffsetClause limitOffset, const Result& result); FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator); diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 5629137de6..4df58591e7 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1143,7 +1143,14 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(2)); + + auto range = iterator->view_; + auto rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(2)); + + ++rangeIterator; + EXPECT_EQ(rangeIterator, range.end()); ++iterator; EXPECT_EQ(iterator, generator.end()); @@ -1173,11 +1180,18 @@ TEST(ExportQueryExecutionTrees, auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(4)); - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(5)); + auto range = iterator->view_; + auto rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(4)); + + ++rangeIterator; + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(5)); + + ++rangeIterator; + EXPECT_EQ(rangeIterator, range.end()); ++iterator; EXPECT_EQ(iterator, generator.end()); @@ -1207,15 +1221,22 @@ TEST(ExportQueryExecutionTrees, auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(1)); - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(2)); + auto range = iterator->view_; + auto rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(1)); - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(3)); + ++rangeIterator; + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(2)); + + ++rangeIterator; + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(3)); + + ++rangeIterator; + EXPECT_EQ(rangeIterator, range.end()); ++iterator; EXPECT_EQ(iterator, generator.end()); @@ -1245,15 +1266,29 @@ TEST(ExportQueryExecutionTrees, auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(2)); - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(3)); + auto range = iterator->view_; + auto rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(2)); + + ++rangeIterator; + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(3)); + + ++rangeIterator; + ASSERT_EQ(rangeIterator, range.end()); ++iterator; ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(4)); + + range = iterator->view_; + rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(4)); + + ++rangeIterator; + EXPECT_EQ(rangeIterator, range.end()); ++iterator; EXPECT_EQ(iterator, generator.end()); @@ -1291,23 +1326,44 @@ TEST(ExportQueryExecutionTrees, auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(3)); - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(4)); + auto range = iterator->view_; + auto rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(3)); - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(5)); + ++rangeIterator; + EXPECT_EQ(rangeIterator, range.end()); ++iterator; ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(6)); + + range = iterator->view_; + rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(4)); + + ++rangeIterator; + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(5)); + + ++rangeIterator; + EXPECT_EQ(rangeIterator, range.end()); ++iterator; ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(iterator->idTable_.at(iterator->index_)[0], Id::makeFromInt(7)); + + range = iterator->view_; + rangeIterator = range.begin(); + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(6)); + + ++rangeIterator; + ASSERT_NE(rangeIterator, range.end()); + EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(7)); + + ++rangeIterator; + EXPECT_EQ(rangeIterator, range.end()); ++iterator; EXPECT_EQ(iterator, generator.end()); From b390f4faca2f1f56499221924ee8cf4213779add Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 16:21:35 +0200 Subject: [PATCH 105/133] Use higher precision timing for `RuntimeInformation` --- src/engine/Operation.cpp | 41 +++++++++++++++++++++++----------------- src/engine/Result.cpp | 12 ++++++------ src/engine/Result.h | 4 ++-- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index bd38ac1d7e..3e333709c1 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -102,19 +102,23 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, timer.msecs(), std::nullopt); } else { runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; - result.runOnNewChunkComputed([this, isRoot]( - const IdTable& idTable, - std::chrono::milliseconds duration) { - runtimeInfo().totalTime_ += duration; - runtimeInfo().originalOperationTime_ = runtimeInfo().getOperationTime(); - runtimeInfo().numRows_ += idTable.numRows(); - runtimeInfo().numCols_ = idTable.numColumns(); - LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() - << " x " << idTable.numColumns() << std::endl; - if (isRoot) { - signalQueryUpdate(); - } - }); + result.runOnNewChunkComputed( + [this, isRoot, counter = 0us]( + const IdTable& idTable, + std::chrono::microseconds duration) mutable { + counter += duration; + runtimeInfo().totalTime_ = + std::chrono::duration_cast(counter); + runtimeInfo().originalOperationTime_ = + runtimeInfo().getOperationTime(); + runtimeInfo().numRows_ += idTable.numRows(); + runtimeInfo().numCols_ = idTable.numColumns(); + LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() + << " x " << idTable.numColumns() << std::endl; + if (isRoot) { + signalQueryUpdate(); + } + }); } // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not // already perform it. An example for an operation that directly computes @@ -126,10 +130,13 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, // limits and offsets. if (!supportsLimit()) { runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, true); - result.applyLimitOffset(_limit, [runtimeInfo = getRuntimeInfoPointer()]( - std::chrono::milliseconds limitTime) { - runtimeInfo->totalTime_ += limitTime; - }); + result.applyLimitOffset( + _limit, [runtimeInfo = getRuntimeInfoPointer(), + counter = 0us](std::chrono::microseconds limitTime) mutable { + counter += limitTime; + runtimeInfo->totalTime_ = + std::chrono::duration_cast(counter); + }); } else { result.enforceLimitOffset(_limit); } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 921e1703a6..af123423f9 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -92,7 +92,7 @@ void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { // _____________________________________________________________________________ void Result::applyLimitOffset( const LimitOffsetClause& limitOffset, - std::function limitTimeCallback) { + std::function limitTimeCallback) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). @@ -104,7 +104,7 @@ void Result::applyLimitOffset( } else { auto generator = [](cppcoro::generator original, LimitOffsetClause limitOffset, - std::function limitTimeCallback) + std::function limitTimeCallback) -> cppcoro::generator { if (limitOffset._limit.value_or(1) == 0) { co_return; @@ -119,7 +119,7 @@ void Result::applyLimitOffset( limitOffset._limit.value() -= limitOffset.actualSize(originalSize - offsetDelta); } - limitTimeCallback(limitTimer.msecs()); + limitTimeCallback(limitTimer.value()); if (limitOffset._offset == 0) { co_yield std::move(idTable); } @@ -207,15 +207,15 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { // _____________________________________________________________________________ void Result::runOnNewChunkComputed( - std::function function) { + std::function function) { AD_CONTRACT_CHECK(!isDataEvaluated()); auto generator = [](cppcoro::generator original, - std::function + std::function function) -> cppcoro::generator { ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; for (auto&& idTable : original) { - function(idTable, timer.msecs()); + function(idTable, timer.value()); co_yield std::move(idTable); timer.start(); } diff --git a/src/engine/Result.h b/src/engine/Result.h index c7f58ad636..c79aba190b 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -108,7 +108,7 @@ class Result { Result& operator=(Result&& other) = default; void runOnNewChunkComputed( - std::function function); + std::function function); void cacheDuringConsumption( std::function&, const IdTable&)> @@ -181,7 +181,7 @@ class Result { // those are still correct after performing this operation. void applyLimitOffset( const LimitOffsetClause& limitOffset, - std::function limitTimeCallback); + std::function limitTimeCallback); void enforceLimitOffset(const LimitOffsetClause& limitOffset); From ce62baad9744c0841dd5b593a23e364d00a38115 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:12:32 +0200 Subject: [PATCH 106/133] Incorporate PR comments for `CacheableGenerator` and `ConcurrentCache` --- src/util/CacheableGenerator.h | 15 ++++++++++----- src/util/ConcurrentCache.h | 33 +++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 9111dafbf5..2b0266b058 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -11,6 +11,11 @@ namespace ad_utility { +// Wrap the given `generator` inside another generator that aggregates a cache +// by calling `aggregator` on every iteration of the inner `generator` until it +// returns false. If the `aggregator` returns false, the cached value is +// discarded. If the cached value is still present once the generator is fully +// consumed, `onFullyCached` is called with the cached value. template cppcoro::generator wrapGeneratorWithCache( cppcoro::generator generator, @@ -18,15 +23,15 @@ cppcoro::generator wrapGeneratorWithCache( aggregator, InvocableWithExactReturnType auto onFullyCached) { std::optional aggregatedData{}; - bool aggregate = true; + bool shouldBeAggregated = true; for (auto&& element : generator) { - if (aggregate) { - aggregate = aggregator(aggregatedData, element); - if (!aggregate) { + if (shouldBeAggregated) { + shouldBeAggregated = aggregator(aggregatedData, element); + if (!shouldBeAggregated) { aggregatedData.reset(); } } - co_yield AD_FWD(element); + co_yield std::move(element); } if (aggregatedData.has_value()) { onFullyCached(std::move(aggregatedData).value()); diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 68703b1f6a..2f22efde8c 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -180,9 +180,9 @@ class ConcurrentCache { * @param onlyReadFromCache If true, then the result will only be returned if * it is contained in the cache. Otherwise `nullptr` with a cache status of * `notInCacheNotComputed` will be returned. - * @return A shared_ptr to the computation result. - * @param suitedForCache Predicate function that will be applied to newly - * computed value to check if it is suited for caching. + * @param suitableForCache Predicate function that will be applied to newly + * computed value to check if it is suitable for caching. Only if it returns + * true the result will be cached. * @return A `ResultAndCacheStatus` shared_ptr to the computation result. * */ @@ -191,9 +191,9 @@ class ConcurrentCache { const InvocableWithConvertibleReturnType auto& computeFunction, bool onlyReadFromCache, const InvocableWithConvertibleReturnType auto& - suitedForCache) { + suitableForCache) { return computeOnceImpl(false, key, computeFunction, onlyReadFromCache, - suitedForCache); + suitableForCache); } /// Similar to computeOnce, with the following addition: After the call @@ -208,15 +208,19 @@ class ConcurrentCache { suitedForCache); } + // Insert `value` into the cache, if the `key` is not already present. In case + // `pinned` is true and the key is already present, the existing value is + // pinned in case it is not pinned yet. void tryInsertIfNotPresent(bool pinned, const Key& key, std::shared_ptr value) { auto lockPtr = _cacheAndInProgressMap.wlock(); + auto& cache = lockPtr->_cache; if (pinned) { - if (!lockPtr->_cache.containsAndMakePinnedIfExists(key)) { - lockPtr->_cache.insertPinned(key, std::move(value)); + if (!cache.containsAndMakePinnedIfExists(key)) { + cache.insertPinned(key, std::move(value)); } - } else if (!lockPtr->_cache.contains(key)) { - lockPtr->_cache.insert(key, std::move(value)); + } else if (!cache.contains(key)) { + cache.insert(key, std::move(value)); } } @@ -340,7 +344,7 @@ class ConcurrentCache { const InvocableWithConvertibleReturnType auto& computeFunction, bool onlyReadFromCache, const InvocableWithConvertibleReturnType auto& - suitedForCache) { + suitableForCache) { using std::make_shared; bool mustCompute; shared_ptr resultInProgress; @@ -383,11 +387,12 @@ class ConcurrentCache { try { // The actual computation shared_ptr result = make_shared(computeFunction()); - if (suitedForCache(*result)) { + if (suitableForCache(*result)) { moveFromInProgressToCache(key, result); // Signal other threads who are waiting for the results. resultInProgress->finish(result); } else { + AD_CONTRACT_CHECK(!pinned); _cacheAndInProgressMap.wlock()->_inProgress.erase(key); resultInProgress->finish(nullptr); } @@ -408,7 +413,11 @@ class ConcurrentCache { if (!resultPointer) { // Fallback computation auto mutablePointer = make_shared(computeFunction()); - tryInsertIfNotPresent(pinned, key, mutablePointer); + if (suitableForCache(*mutablePointer)) { + tryInsertIfNotPresent(pinned, key, mutablePointer); + } else { + AD_CONTRACT_CHECK(!pinned); + } resultPointer = std::move(mutablePointer); } return {std::move(resultPointer), CacheStatus::computed}; From 9755da865cc56ad633c7e85b9a7a9a14885dfddf Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:38:26 +0200 Subject: [PATCH 107/133] Address more PR comments --- src/engine/Operation.cpp | 44 ++++++++++++++++++++-------------------- src/engine/Result.cpp | 5 +---- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 3e333709c1..37378ede19 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -103,9 +103,8 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, } else { runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; result.runOnNewChunkComputed( - [this, isRoot, counter = 0us]( - const IdTable& idTable, - std::chrono::microseconds duration) mutable { + [this, counter = 0us](const IdTable& idTable, + std::chrono::microseconds duration) mutable { counter += duration; runtimeInfo().totalTime_ = std::chrono::duration_cast(counter); @@ -115,9 +114,7 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, runtimeInfo().numCols_ = idTable.numColumns(); LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() << " x " << idTable.numColumns() << std::endl; - if (isRoot) { - signalQueryUpdate(); - } + signalQueryUpdate(); }); } // Apply LIMIT and OFFSET, but only if the call to `computeResult` did not @@ -149,22 +146,25 @@ CacheValue Operation::runComputationAndPrepareForCache( const std::string& cacheKey, bool pinned, bool isRoot) { auto& cache = _executionContext->getQueryTreeCache(); auto result = runComputation(timer, computationMode, isRoot); - result.cacheDuringConsumption( - [&cache](const std::optional& currentIdTable, - const IdTable& newIdTable) { - auto currentSize = currentIdTable.has_value() - ? CacheValue::getSize(currentIdTable.value()) - : 0_B; - return cache.getMaxSizeSingleEntry() >= - currentSize + CacheValue::getSize(newIdTable); - }, - [runtimeInfo = getRuntimeInfoPointer(), &cache, cacheKey, - pinned](Result aggregatedResult) { - cache.tryInsertIfNotPresent( - pinned, cacheKey, - std::make_shared(std::move(aggregatedResult), - *runtimeInfo)); - }); + if (!result.isDataEvaluated()) { + AD_CONTRACT_CHECK(!pinned); + result.cacheDuringConsumption( + [maxSize = cache.getMaxSizeSingleEntry()]( + const std::optional& currentIdTable, + const IdTable& newIdTable) { + auto currentSize = currentIdTable.has_value() + ? CacheValue::getSize(currentIdTable.value()) + : 0_B; + return maxSize >= currentSize + CacheValue::getSize(newIdTable); + }, + [runtimeInfo = getRuntimeInfoPointer(), &cache, + cacheKey](Result aggregatedResult) { + cache.tryInsertIfNotPresent( + false, cacheKey, + std::make_shared(std::move(aggregatedResult), + *runtimeInfo)); + }); + } if (result.isDataEvaluated()) { auto resultNumRows = result.idTable().size(); auto resultNumCols = result.idTable().numColumns(); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index af123423f9..26d18d8d33 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -264,10 +264,7 @@ void Result::cacheDuringConsumption( std::function&, const IdTable&)> fitInCache, std::function storeInCache) { - if (isDataEvaluated()) { - return; - } - + AD_CONTRACT_CHECK(!isDataEvaluated()); data_ = ad_utility::wrapGeneratorWithCache( std::move(idTables()), [fitInCache = std::move(fitInCache)](std::optional& aggregate, From c0ef64b06587248fc7ae3f21c8be895fb92dac72 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:44:06 +0200 Subject: [PATCH 108/133] Fix unused warnings --- src/engine/Operation.cpp | 12 +++++------- src/engine/Operation.h | 4 ++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 37378ede19..7c324272c0 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -71,8 +71,7 @@ void Operation::recursivelySetTimeConstraint( // _____________________________________________________________________________ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, - ComputationMode computationMode, - bool isRoot) { + ComputationMode computationMode) { checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); @@ -143,9 +142,9 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, // _____________________________________________________________________________ CacheValue Operation::runComputationAndPrepareForCache( const ad_utility::Timer& timer, ComputationMode computationMode, - const std::string& cacheKey, bool pinned, bool isRoot) { + const std::string& cacheKey, bool pinned) { auto& cache = _executionContext->getQueryTreeCache(); - auto result = runComputation(timer, computationMode, isRoot); + auto result = runComputation(timer, computationMode); if (!result.isDataEvaluated()) { AD_CONTRACT_CHECK(!pinned); result.cacheDuringConsumption( @@ -207,10 +206,9 @@ std::shared_ptr Operation::getResult( updateRuntimeInformationOnFailure(timer.msecs()); } }); - auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult, - isRoot]() { + auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult]() { return runComputationAndPrepareForCache(timer, computationMode, cacheKey, - pinResult, isRoot); + pinResult); }; auto suitedForCache = [](const CacheValue& cacheValue) { diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 5400506294..f9989210d2 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -261,12 +261,12 @@ class Operation { virtual ProtoResult computeResult(bool requestLaziness) = 0; ProtoResult runComputation(const ad_utility::Timer& timer, - ComputationMode computationMode, bool isRoot); + ComputationMode computationMode); CacheValue runComputationAndPrepareForCache(const ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, - bool pinned, bool isRoot); + bool pinned); // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. From 44d103f2e5e4fde5426a922e6b12cdf3d15998b4 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 18:03:53 +0200 Subject: [PATCH 109/133] Fix flickering timing information --- src/engine/Operation.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 7c324272c0..4c0ce53daf 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -102,11 +102,12 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, } else { runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; result.runOnNewChunkComputed( - [this, counter = 0us](const IdTable& idTable, + [this, overlap = 0us](const IdTable& idTable, std::chrono::microseconds duration) mutable { - counter += duration; - runtimeInfo().totalTime_ = - std::chrono::duration_cast(counter); + overlap += duration; + runtimeInfo().totalTime_ += + std::chrono::duration_cast(overlap); + overlap -= runtimeInfo().totalTime_; runtimeInfo().originalOperationTime_ = runtimeInfo().getOperationTime(); runtimeInfo().numRows_ += idTable.numRows(); @@ -128,10 +129,12 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, true); result.applyLimitOffset( _limit, [runtimeInfo = getRuntimeInfoPointer(), - counter = 0us](std::chrono::microseconds limitTime) mutable { - counter += limitTime; + overlap = 0us](std::chrono::microseconds limitTime) mutable { + overlap += limitTime; runtimeInfo->totalTime_ = - std::chrono::duration_cast(counter); + std::chrono::duration_cast(overlap); + overlap -= runtimeInfo->totalTime_; + runtimeInfo->originalOperationTime_ = runtimeInfo->getOperationTime(); }); } else { result.enforceLimitOffset(_limit); From 4a3f09ba06702d83fe27eed87fa91859e2cfbd1f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 18:14:00 +0200 Subject: [PATCH 110/133] Actually fix the timing issue --- src/engine/Operation.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 4c0ce53daf..acc78ed1eb 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -105,9 +105,10 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, [this, overlap = 0us](const IdTable& idTable, std::chrono::microseconds duration) mutable { overlap += duration; - runtimeInfo().totalTime_ += + auto msPrecision = std::chrono::duration_cast(overlap); - overlap -= runtimeInfo().totalTime_; + runtimeInfo().totalTime_ += msPrecision; + overlap -= msPrecision; runtimeInfo().originalOperationTime_ = runtimeInfo().getOperationTime(); runtimeInfo().numRows_ += idTable.numRows(); @@ -131,9 +132,10 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, _limit, [runtimeInfo = getRuntimeInfoPointer(), overlap = 0us](std::chrono::microseconds limitTime) mutable { overlap += limitTime; - runtimeInfo->totalTime_ = + auto msPrecision = std::chrono::duration_cast(overlap); - overlap -= runtimeInfo->totalTime_; + runtimeInfo->totalTime_ += msPrecision; + overlap -= msPrecision; runtimeInfo->originalOperationTime_ = runtimeInfo->getOperationTime(); }); } else { From 0164d9c9c6dd29e792b637f744509f6dfb427e0e Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 20:58:22 +0200 Subject: [PATCH 111/133] Small improvements to query updates --- src/engine/ExportQueryExecutionTrees.cpp | 8 ++++--- src/engine/Operation.cpp | 15 ++++++++++-- src/engine/Result.cpp | 30 +++++++++++++++++------- src/engine/Result.h | 3 ++- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 02f4cdf6c0..e1650b64ac 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -38,7 +38,8 @@ ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); if (currentOffset != upperBound) { - co_yield {idTable, std::views::iota(currentOffset, upperBound)}; + co_yield TableWithRange{idTable, + std::views::iota(currentOffset, upperBound)}; } limitOffset._offset -= currentOffset; if (limitOffset._limit.has_value()) { @@ -677,8 +678,9 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( const ad_utility::Timer& requestTimer, CancellationHandle cancellationHandle) { auto timeUntilFunctionCall = requestTimer.msecs(); - std::shared_ptr result = - qet.getResult(query._limitOffset._limit.has_value()); + // Always request lazy if possible, the lower memory footprint outvalues the + // potential overhead of generators. + std::shared_ptr result = qet.getResult(true); result->logResultSize(); nlohmann::json j; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index acc78ed1eb..87c5bdc514 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -102,9 +102,11 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, } else { runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; result.runOnNewChunkComputed( - [this, overlap = 0us](const IdTable& idTable, - std::chrono::microseconds duration) mutable { + [this, overlap = 0us, timeSizeUpdate = 0us]( + const IdTable& idTable, + std::chrono::microseconds duration) mutable { overlap += duration; + timeSizeUpdate += duration; auto msPrecision = std::chrono::duration_cast(overlap); runtimeInfo().totalTime_ += msPrecision; @@ -115,6 +117,15 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, runtimeInfo().numCols_ = idTable.numColumns(); LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() << " x " << idTable.numColumns() << std::endl; + if (timeSizeUpdate > 50ms) { + timeSizeUpdate = 0us; + signalQueryUpdate(); + } + }, + [this](bool failed) { + if (failed) { + runtimeInfo().status_ = RuntimeInformation::failed; + } signalQueryUpdate(); }); } diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 26d18d8d33..078279399e 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -207,19 +207,33 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { // _____________________________________________________________________________ void Result::runOnNewChunkComputed( - std::function function) { + std::function onNewChunk, + std::function onGeneratorFinished) { AD_CONTRACT_CHECK(!isDataEvaluated()); auto generator = [](cppcoro::generator original, std::function - function) -> cppcoro::generator { - ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; - for (auto&& idTable : original) { - function(idTable, timer.value()); - co_yield std::move(idTable); - timer.start(); + onNewChunk, + std::function onGeneratorFinished) + -> cppcoro::generator { + // Call this within destructor to make sure it is also called when an + // operation stops iterating before reaching the end. + absl::Cleanup cleanup{ + [&onGeneratorFinished]() { onGeneratorFinished(false); }}; + try { + ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; + for (auto&& idTable : original) { + onNewChunk(idTable, timer.value()); + co_yield std::move(idTable); + timer.start(); + } + } catch (...) { + std::move(cleanup).Cancel(); + onGeneratorFinished(true); + throw; } - }(std::move(idTables()), std::move(function)); + }(std::move(idTables()), std::move(onNewChunk), + std::move(onGeneratorFinished)); data_ = std::move(generator); } diff --git a/src/engine/Result.h b/src/engine/Result.h index c79aba190b..11470cd20f 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -108,7 +108,8 @@ class Result { Result& operator=(Result&& other) = default; void runOnNewChunkComputed( - std::function function); + std::function onNewChunk, + std::function onGeneratorFinished); void cacheDuringConsumption( std::function&, const IdTable&)> From f23849c7bae6f84512f92b0400b5d0168a65c64b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 21:57:12 +0200 Subject: [PATCH 112/133] Add a lot of documentation --- src/engine/ExportQueryExecutionTrees.h | 4 +++ src/engine/Filter.h | 4 +++ src/engine/Operation.cpp | 1 + src/engine/Operation.h | 6 ++++ src/engine/Result.h | 45 +++++++++++++++++++++++--- 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index b02bef7a98..4ad93c9e5b 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -178,12 +178,16 @@ class ExportQueryExecutionTrees { const parsedQuery::SelectClause& selectClause, LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); + // Helper type that contains an `IdTable` and a view with related indices to + // access the `IdTable` with. struct TableWithRange { const IdTable& idTable_; std::ranges::iota_view view_; }; + // Yield all `IdTables` provided by the given `result`. static cppcoro::generator getIdTables(const Result& result); + // Return a range that contains the indices of the rows that have to be // exported from the `idTable` given the `LimitOffsetClause`. It takes into // account the LIMIT, the OFFSET, and the actual size of the `idTable` diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 6a3ef107c7..ce061faf58 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -60,10 +60,14 @@ class Filter : public Operation { ProtoResult computeResult(bool requestLaziness) override; + // Perform the actual filter operation of the data provided by + // `evaluationContext`. template IdTable computeFilterImpl( sparqlExpression::EvaluationContext& evaluationContext); + // Run `computeFilterImpl` once for every `IdTable` yielded by `subResult` if + // it is lazily evaluated. cppcoro::generator filterInChunks( std::shared_ptr subRes); }; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 87c5bdc514..50ae495b9a 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -72,6 +72,7 @@ void Operation::recursivelySetTimeConstraint( // _____________________________________________________________________________ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, ComputationMode computationMode) { + AD_CONTRACT_CHECK(computationMode != ComputationMode::ONLY_IF_CACHED); checkCancellation(); runtimeInfo().status_ = RuntimeInformation::Status::inProgress; signalQueryUpdate(); diff --git a/src/engine/Operation.h b/src/engine/Operation.h index f9989210d2..880151eaeb 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -260,9 +260,15 @@ class Operation { //! Compute the result of the query-subtree rooted at this element.. virtual ProtoResult computeResult(bool requestLaziness) = 0; + // Perform the expensive computation modeled by the subclass of this + // `Operation`. The value provided by `computationMode` decides if lazy + // results are preferred. It must not be `ONLY_IF_CACHED`, this will lead to + // an `ad_utility::Exception`. ProtoResult runComputation(const ad_utility::Timer& timer, ComputationMode computationMode); + // Call `runComputationAndPrepareForCache` and transform it into a value that + // could be inserted into the cache. CacheValue runComputationAndPrepareForCache(const ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, diff --git a/src/engine/Result.h b/src/engine/Result.h index 11470cd20f..7b5cb314b6 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -107,19 +107,41 @@ class Result { Result(Result&& other) = default; Result& operator=(Result&& other) = default; + // Wrap the generator stored in `data_` within a new generator that calls + // `onNewChunk` every time a new `IdTable` is yielded by the original + // generator and passed this new `IdTable` along with microsecond precision + // timing information on how long it took to compute this new chunk. + // `onGeneratorFinished` is guaranteed to be called eventually as long as the + // generator is consumed at least partially, with `true` if an exception + // occured during consumption or with `false` when the generator is done + // processing or abandoned and destroyed. + // + // Throw an `ad_utility::Exception` if the underlying `data_` member holds the + // wrong variant. void runOnNewChunkComputed( std::function onNewChunk, std::function onGeneratorFinished); + // Wrap the generator stored in `data_` within a new generator that aggregates + // the entries yielded by the generator into a cacheable `IdTable`. Once + // `fitInCache` returns false, thus indicating that both passed arguments + // together would be too large to be cached, this cached value is discarded. + // If this cached value still exists when the generator is fully consumed a + // new `Result` is created with this value and passed to `storeInCache`. + // + // Throw an `ad_utility::Exception` if the underlying `data_` member holds the + // wrong variant. void cacheDuringConsumption( std::function&, const IdTable&)> fitInCache, std::function storeInCache); - // Const access to the underlying `IdTable`. + // Const access to the underlying `IdTable`. Throw an `ad_utility::Exception` + // if the underlying `data_` member holds the wrong variant. const IdTable& idTable() const; - // Access to the underlying `IdTable`s. + // Access to the underlying `IdTable`s. Throw an `ad_utility::Exception` + // if the underlying `data_` member holds the wrong variant. cppcoro::generator& idTables() const; // Const access to the columns by which the `idTable()` is sorted. @@ -165,6 +187,7 @@ class Result { // (which is not possible with `shareLocalVocabFrom`). LocalVocab getCopyOfLocalVocab() const; + // Return true if `data_` holds an `IdTable`, false otherwise. bool isDataEvaluated() const noexcept; // Log the size of this result. We call this at several places in @@ -177,19 +200,31 @@ class Result { string asDebugString() const; // Apply the `limitOffset` clause by shifting and then resizing the `IdTable`. - // Note: If additional members and invariants are added to the class (for + // This also applies if `data_` holds a generator yielding `IdTable`s, where + // this is applied respectively. + // `limitTimeCallback` is called whenever an `IdTable` is resized with the + // number of microseconds it took to perform this operation. + // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those are still correct after performing this operation. void applyLimitOffset( const LimitOffsetClause& limitOffset, std::function limitTimeCallback); + // Check if the operation did fulfill its contract and only returns as many + // elements as requested by the provided `limitOffset`. Throw an + // `ad_utility::Exception` otherwise. When `data_` holds a generator, this + // behaviour applies analogously when consuming the generator. + // This member function provides an alternative to `applyLimitOffset` that + // resizes the result if the operation doesn't support this on its own. void enforceLimitOffset(const LimitOffsetClause& limitOffset); // Check that if the `varColMap` guarantees that a column is always defined // (i.e. that is contains no single undefined value) that there are indeed no - // undefined values in the `data_` of this result. Return `true` iff the - // check is successful. + // undefined values in the `data_` of this result. Do nothing iff the check is + // successful. Throw an `ad_utility::Exception` otherwise. When `data_` holds + // a generator, this behaviour applies analogously when consuming the + // generator. void checkDefinedness(const VariableToColumnMap& varColMap); }; From 3c69ebf5b7b5c69341303e83f4fe2d65d5ee30cb Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 22:23:43 +0200 Subject: [PATCH 113/133] Rename some functions --- src/engine/ExportQueryExecutionTrees.cpp | 2 +- src/engine/Filter.cpp | 2 +- src/engine/Operation.cpp | 16 ++++++------- src/engine/QueryExecutionTree.cpp | 4 ++-- src/engine/Result.cpp | 30 ++++++++++++------------ src/engine/Result.h | 4 ++-- test/FilterTest.cpp | 4 ++-- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index e1650b64ac..2a5377d880 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -16,7 +16,7 @@ cppcoro::generator ExportQueryExecutionTrees::getIdTables( const Result& result) { - if (result.isDataEvaluated()) { + if (result.isFullyMaterialized()) { co_yield result.idTable(); } else { for (const IdTable& idTable : result.idTables()) { diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 93be9d8a05..1055e2df93 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -49,7 +49,7 @@ ProtoResult Filter::computeResult(bool requestLaziness) { LOG(DEBUG) << "Filter result computation..." << endl; checkCancellation(); - if (subRes->isDataEvaluated()) { + if (subRes->isFullyMaterialized()) { sparqlExpression::EvaluationContext evaluationContext( *getExecutionContext(), _subtree->getVariableColumns(), subRes->idTable(), getExecutionContext()->getAllocator(), diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 50ae495b9a..e35938ac24 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -79,7 +79,7 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, ProtoResult result = computeResult(computationMode == ComputationMode::LAZY_IF_SUPPORTED); AD_CONTRACT_CHECK(computationMode == ComputationMode::LAZY_IF_SUPPORTED || - result.isDataEvaluated()); + result.isFullyMaterialized()); checkCancellation(); if constexpr (ad_utility::areExpensiveChecksEnabled) { @@ -96,7 +96,7 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, // correct runtimeInfo. The children of the runtime info are already set // correctly because the result was computed, so we can pass `nullopt` as // the last argument. - if (result.isDataEvaluated()) { + if (result.isFullyMaterialized()) { updateRuntimeInformationOnSuccess(result.idTable().size(), ad_utility::CacheStatus::computed, timer.msecs(), std::nullopt); @@ -151,7 +151,7 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, runtimeInfo->originalOperationTime_ = runtimeInfo->getOperationTime(); }); } else { - result.enforceLimitOffset(_limit); + result.assertThatLimitWasRespected(_limit); } return result; } @@ -162,7 +162,7 @@ CacheValue Operation::runComputationAndPrepareForCache( const std::string& cacheKey, bool pinned) { auto& cache = _executionContext->getQueryTreeCache(); auto result = runComputation(timer, computationMode); - if (!result.isDataEvaluated()) { + if (!result.isFullyMaterialized()) { AD_CONTRACT_CHECK(!pinned); result.cacheDuringConsumption( [maxSize = cache.getMaxSizeSingleEntry()]( @@ -181,7 +181,7 @@ CacheValue Operation::runComputationAndPrepareForCache( *runtimeInfo)); }); } - if (result.isDataEvaluated()) { + if (result.isFullyMaterialized()) { auto resultNumRows = result.idTable().size(); auto resultNumCols = result.idTable().numColumns(); LOG(DEBUG) << "Computed result of size " << resultNumRows << " x " @@ -229,7 +229,7 @@ std::shared_ptr Operation::getResult( }; auto suitedForCache = [](const CacheValue& cacheValue) { - return cacheValue.resultTable().isDataEvaluated(); + return cacheValue.resultTable().isFullyMaterialized(); }; bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; @@ -245,7 +245,7 @@ std::shared_ptr Operation::getResult( return nullptr; } - if (result._resultPointer->resultTable().isDataEvaluated()) { + if (result._resultPointer->resultTable().isFullyMaterialized()) { updateRuntimeInformationOnSuccess(result, timer.msecs()); } @@ -337,7 +337,7 @@ void Operation::updateRuntimeInformationOnSuccess( const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) { const auto& result = resultAndCacheStatus._resultPointer->resultTable(); - AD_CONTRACT_CHECK(result.isDataEvaluated()); + AD_CONTRACT_CHECK(result.isFullyMaterialized()); updateRuntimeInformationOnSuccess( result.idTable().size(), resultAndCacheStatus._cacheStatus, duration, resultAndCacheStatus._resultPointer->runtimeInfo()); diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 12392aeb6e..aed58d4fde 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -84,7 +84,7 @@ size_t QueryExecutionTree::getCostEstimate() { size_t QueryExecutionTree::getSizeEstimate() { if (!sizeEstimate_.has_value()) { if (cachedResult_) { - AD_CORRECTNESS_CHECK(cachedResult_->isDataEvaluated()); + AD_CORRECTNESS_CHECK(cachedResult_->isFullyMaterialized()); sizeEstimate_ = cachedResult_->idTable().size(); } else { // if we are in a unit test setting and there is no QueryExecutionContest @@ -99,7 +99,7 @@ size_t QueryExecutionTree::getSizeEstimate() { // _____________________________________________________________________________ bool QueryExecutionTree::knownEmptyResult() { if (cachedResult_) { - AD_CORRECTNESS_CHECK(cachedResult_->isDataEvaluated()); + AD_CORRECTNESS_CHECK(cachedResult_->isFullyMaterialized()); return cachedResult_->idTable().size() == 0; } return rootOperation_->knownEmptyResult(); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 078279399e..9c7d8dcf23 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -55,8 +55,7 @@ Result::Result(IdTable idTable, std::vector sortedBy, Result::Result(cppcoro::generator idTables, std::vector sortedBy, SharedLocalVocabWrapper localVocab) - : data_{[](auto idTables, - auto sortedBy) mutable -> cppcoro::generator { + : data_{[](auto idTables, auto sortedBy) -> cppcoro::generator { for (IdTable& idTable : idTables) { validateIdTable(idTable, sortedBy); co_yield std::move(idTable); @@ -74,7 +73,8 @@ Result::Result(cppcoro::generator idTables, SharedLocalVocabWrapper{std::move(localVocab)}} {} // _____________________________________________________________________________ -void modifyIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { +// Apply `LimitOffsetClause` to given `IdTable`. +void resizeIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { std::ranges::for_each( idTable.getColumns(), [offset = limitOffset.actualOffset(idTable.numRows()), @@ -97,9 +97,9 @@ void Result::applyLimitOffset( // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). AD_CONTRACT_CHECK(limitTimeCallback); - if (isDataEvaluated()) { + if (isFullyMaterialized()) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; - modifyIdTable(std::get(data_), limitOffset); + resizeIdTable(std::get(data_), limitOffset); limitTimeCallback(limitTimer.msecs()); } else { auto generator = @@ -112,7 +112,7 @@ void Result::applyLimitOffset( for (auto&& idTable : original) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; size_t originalSize = idTable.numRows(); - modifyIdTable(idTable, limitOffset); + resizeIdTable(idTable, limitOffset); uint64_t offsetDelta = limitOffset.actualOffset(originalSize); limitOffset._offset -= offsetDelta; if (limitOffset._limit.has_value()) { @@ -133,8 +133,8 @@ void Result::applyLimitOffset( } // _____________________________________________________________________________ -void Result::enforceLimitOffset(const LimitOffsetClause& limitOffset) { - if (isDataEvaluated()) { +void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { + if (isFullyMaterialized()) { uint64_t numRows = idTable().numRows(); auto limit = limitOffset._limit; AD_CONTRACT_CHECK(!limit.has_value() || numRows <= limit.value()); @@ -184,7 +184,7 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { !hasUndefined; }); }; - if (isDataEvaluated()) { + if (isFullyMaterialized()) { AD_EXPENSIVE_CHECK(performCheck(varColMap, std::get(data_))); } else { auto generator = [](cppcoro::generator original, @@ -209,7 +209,7 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { void Result::runOnNewChunkComputed( std::function onNewChunk, std::function onGeneratorFinished) { - AD_CONTRACT_CHECK(!isDataEvaluated()); + AD_CONTRACT_CHECK(!isFullyMaterialized()); auto generator = [](cppcoro::generator original, std::function @@ -258,18 +258,18 @@ void Result::validateIdTable(const IdTable& idTable, // _____________________________________________________________________________ const IdTable& Result::idTable() const { - AD_CONTRACT_CHECK(isDataEvaluated()); + AD_CONTRACT_CHECK(isFullyMaterialized()); return std::get(data_); } // _____________________________________________________________________________ cppcoro::generator& Result::idTables() const { - AD_CONTRACT_CHECK(!isDataEvaluated()); + AD_CONTRACT_CHECK(!isFullyMaterialized()); return std::get>(data_); } // _____________________________________________________________________________ -bool Result::isDataEvaluated() const noexcept { +bool Result::isFullyMaterialized() const noexcept { return std::holds_alternative(data_); } @@ -278,7 +278,7 @@ void Result::cacheDuringConsumption( std::function&, const IdTable&)> fitInCache, std::function storeInCache) { - AD_CONTRACT_CHECK(!isDataEvaluated()); + AD_CONTRACT_CHECK(!isFullyMaterialized()); data_ = ad_utility::wrapGeneratorWithCache( std::move(idTables()), [fitInCache = std::move(fitInCache)](std::optional& aggregate, @@ -302,7 +302,7 @@ void Result::cacheDuringConsumption( // _____________________________________________________________________________ void Result::logResultSize() const { - if (isDataEvaluated()) { + if (isFullyMaterialized()) { LOG(INFO) << "Result has size " << idTable().size() << " x " << idTable().numColumns() << std::endl; } else { diff --git a/src/engine/Result.h b/src/engine/Result.h index 7b5cb314b6..b265a53765 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -188,7 +188,7 @@ class Result { LocalVocab getCopyOfLocalVocab() const; // Return true if `data_` holds an `IdTable`, false otherwise. - bool isDataEvaluated() const noexcept; + bool isFullyMaterialized() const noexcept; // Log the size of this result. We call this at several places in // `Server::processQuery`. Ideally, this should only be called in one @@ -217,7 +217,7 @@ class Result { // behaviour applies analogously when consuming the generator. // This member function provides an alternative to `applyLimitOffset` that // resizes the result if the operation doesn't support this on its own. - void enforceLimitOffset(const LimitOffsetClause& limitOffset); + void assertThatLimitWasRespected(const LimitOffsetClause& limitOffset); // Check that if the `varColMap` guarantees that a column is always defined // (i.e. that is contains no single undefined value) that there are indeed no diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index 96fa82c5e5..4e40d68989 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -109,7 +109,7 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { "Expression ?x"}}; auto result = filter.getResult(false, ComputationMode::LAZY_IF_SUPPORTED); - ASSERT_FALSE(result->isDataEvaluated()); + ASSERT_FALSE(result->isFullyMaterialized()); auto& generator = result->idTables(); auto iterator = generator.begin(); @@ -158,7 +158,7 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnNonLazyEvaluation) { "Expression ?x"}}; auto result = filter.getResult(false, ComputationMode::FULLY_MATERIALIZED); - ASSERT_TRUE(result->isDataEvaluated()); + ASSERT_TRUE(result->isFullyMaterialized()); EXPECT_THAT(result->idTable(), ElementsAre(makeRow(true), makeRow(true), makeRow(true), makeRow(true), makeRow(true))); From 383696b6f88779647b7b75ac0f7a24a1b32d659b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 22:31:12 +0200 Subject: [PATCH 114/133] Fix spelling error --- src/engine/Result.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/Result.h b/src/engine/Result.h index b265a53765..2c5e0e809e 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -113,7 +113,7 @@ class Result { // timing information on how long it took to compute this new chunk. // `onGeneratorFinished` is guaranteed to be called eventually as long as the // generator is consumed at least partially, with `true` if an exception - // occured during consumption or with `false` when the generator is done + // occurred during consumption or with `false` when the generator is done // processing or abandoned and destroyed. // // Throw an `ad_utility::Exception` if the underlying `data_` member holds the From d89e8967ce80378f8361232a1b11168ade1caa92 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 15 Aug 2024 23:17:03 +0200 Subject: [PATCH 115/133] Fix build on macOS --- src/engine/ExportQueryExecutionTrees.cpp | 15 +++++++-------- src/engine/ExportQueryExecutionTrees.h | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 2a5377d880..4b9cd88e62 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -38,8 +38,7 @@ ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); if (currentOffset != upperBound) { - co_yield TableWithRange{idTable, - std::views::iota(currentOffset, upperBound)}; + co_yield {idTable, std::views::iota(currentOffset, upperBound)}; } limitOffset._offset -= currentOffset; if (limitOffset._limit.has_value()) { @@ -60,7 +59,7 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { - for (size_t i : range) { + for (uint64_t i : range) { ConstructQueryExportContext context{i, idTable, result->localVocab(), qet.getVariableColumns(), qet.getQec()->getIndex()}; @@ -145,7 +144,7 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( nlohmann::json json = nlohmann::json::array(); for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { - for (size_t rowIndex : range) { + for (uint64_t rowIndex : range) { // We need the explicit `array` constructor for the special case of zero // variables. json.push_back(nlohmann::json::array()); @@ -387,7 +386,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( }; for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { - for (size_t rowIndex : range) { + for (uint64_t rowIndex : range) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. nlohmann::ordered_json binding; @@ -464,7 +463,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { - for (size_t i : range) { + for (uint64_t i : range) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { co_yield std::string_view{reinterpret_cast(&idTable( @@ -494,7 +493,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { - for (size_t i : range) { + for (uint64_t i : range) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); @@ -619,7 +618,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { - for (size_t i : range) { + for (uint64_t i : range) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 4ad93c9e5b..4d001338cf 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -182,7 +182,7 @@ class ExportQueryExecutionTrees { // access the `IdTable` with. struct TableWithRange { const IdTable& idTable_; - std::ranges::iota_view view_; + std::ranges::iota_view view_; }; // Yield all `IdTables` provided by the given `result`. From b5dd60c501cbd246f3887c43e85bb2e6505fa96c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:29:24 +0200 Subject: [PATCH 116/133] Address even more PR comments --- src/engine/ExportQueryExecutionTrees.cpp | 13 +++--- src/engine/Filter.cpp | 59 +++++++++++------------- src/engine/Filter.h | 7 ++- src/engine/Result.cpp | 52 ++++++++++----------- src/engine/Result.h | 13 ++++-- src/util/CacheableGenerator.h | 4 +- 6 files changed, 74 insertions(+), 74 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 4b9cd88e62..4bda95d994 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -58,7 +58,7 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { - for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { ConstructQueryExportContext context{i, idTable, result->localVocab(), qet.getVariableColumns(), @@ -143,7 +143,7 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( AD_CORRECTNESS_CHECK(result != nullptr); nlohmann::json json = nlohmann::json::array(); - for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t rowIndex : range) { // We need the explicit `array` constructor for the special case of zero // variables. @@ -385,7 +385,7 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultToSparqlJSON( return b; }; - for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t rowIndex : range) { // TODO: ordered_json` entries are ordered alphabetically, but insertion // order would be preferable. @@ -462,7 +462,8 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [idTable, range] : + getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { @@ -492,7 +493,7 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { @@ -617,7 +618,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (auto [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 1055e2df93..9ede82df0f 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -50,46 +50,39 @@ ProtoResult Filter::computeResult(bool requestLaziness) { checkCancellation(); if (subRes->isFullyMaterialized()) { - sparqlExpression::EvaluationContext evaluationContext( - *getExecutionContext(), _subtree->getVariableColumns(), - subRes->idTable(), getExecutionContext()->getAllocator(), - subRes->localVocab(), cancellationHandle_, deadline_); - - // TODO This should be a mandatory argument to the - // EvaluationContext constructor. - evaluationContext._columnsByWhichResultIsSorted = subRes->sortedBy(); - - size_t width = evaluationContext._inputTable.numColumns(); - IdTable result = CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, - evaluationContext); + IdTable result = filterIdTable(subRes, subRes->idTable()); LOG(DEBUG) << "Filter result computation done." << endl; - checkCancellation(); return {std::move(result), resultSortedOn(), subRes->getSharedLocalVocab()}; } - return {filterInChunks(subRes), resultSortedOn(), - subRes->getSharedLocalVocab()}; + auto localVocab = subRes->getSharedLocalVocab(); + return {[](auto subRes, auto* self) -> cppcoro::generator { + for (IdTable& idTable : subRes->idTables()) { + IdTable result = self->filterIdTable(subRes, idTable); + LOG(DEBUG) << "Filter result chunk done." << endl; + co_yield result; + } + }(std::move(subRes), this), + resultSortedOn(), std::move(localVocab)}; } // _____________________________________________________________________________ -cppcoro::generator Filter::filterInChunks( - std::shared_ptr subRes) { - for (const IdTable& idTable : subRes->idTables()) { - sparqlExpression::EvaluationContext evaluationContext( - *getExecutionContext(), _subtree->getVariableColumns(), idTable, - getExecutionContext()->getAllocator(), subRes->localVocab(), - cancellationHandle_, deadline_); - - // TODO This should be a mandatory argument to the - // EvaluationContext constructor. - evaluationContext._columnsByWhichResultIsSorted = subRes->sortedBy(); - - size_t width = evaluationContext._inputTable.numColumns(); - co_yield CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, - evaluationContext); - LOG(DEBUG) << "Filter result chunk done." << endl; - checkCancellation(); - } +IdTable Filter::filterIdTable(const std::shared_ptr& subRes, + const IdTable& idTable) { + sparqlExpression::EvaluationContext evaluationContext( + *getExecutionContext(), _subtree->getVariableColumns(), idTable, + getExecutionContext()->getAllocator(), subRes->localVocab(), + cancellationHandle_, deadline_); + + // TODO This should be a mandatory argument to the + // EvaluationContext constructor. + evaluationContext._columnsByWhichResultIsSorted = subRes->sortedBy(); + + size_t width = evaluationContext._inputTable.numColumns(); + IdTable result = CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, + evaluationContext); + checkCancellation(); + return result; } // _____________________________________________________________________________ diff --git a/src/engine/Filter.h b/src/engine/Filter.h index ce061faf58..bc34279f77 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -66,8 +66,7 @@ class Filter : public Operation { IdTable computeFilterImpl( sparqlExpression::EvaluationContext& evaluationContext); - // Run `computeFilterImpl` once for every `IdTable` yielded by `subResult` if - // it is lazily evaluated. - cppcoro::generator filterInChunks( - std::shared_ptr subRes); + // Run `computeFilterImpl` on the provided IdTable + IdTable filterIdTable(const std::shared_ptr& subRes, + const IdTable& idTable); }; diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 9c7d8dcf23..0eeca8bfd5 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -55,12 +55,13 @@ Result::Result(IdTable idTable, std::vector sortedBy, Result::Result(cppcoro::generator idTables, std::vector sortedBy, SharedLocalVocabWrapper localVocab) - : data_{[](auto idTables, auto sortedBy) -> cppcoro::generator { - for (IdTable& idTable : idTables) { - validateIdTable(idTable, sortedBy); - co_yield std::move(idTable); - } - }(std::move(idTables), sortedBy)}, + : data_{GenContainer{ + [](auto idTables, auto sortedBy) -> cppcoro::generator { + for (IdTable& idTable : idTables) { + validateIdTable(idTable, sortedBy); + co_yield std::move(idTable); + } + }(std::move(idTables), sortedBy)}}, sortedBy_{std::move(sortedBy)}, localVocab_{std::move(localVocab.localVocab_)} { AD_CONTRACT_CHECK(localVocab_ != nullptr); @@ -109,7 +110,7 @@ void Result::applyLimitOffset( if (limitOffset._limit.value_or(1) == 0) { co_return; } - for (auto&& idTable : original) { + for (IdTable& idTable : original) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; size_t originalSize = idTable.numRows(); resizeIdTable(idTable, limitOffset); @@ -121,14 +122,14 @@ void Result::applyLimitOffset( } limitTimeCallback(limitTimer.value()); if (limitOffset._offset == 0) { - co_yield std::move(idTable); + co_yield idTable; } if (limitOffset._limit.value_or(1) == 0) { break; } } }(std::move(idTables()), limitOffset, std::move(limitTimeCallback)); - data_ = std::move(generator); + data_.emplace(std::move(generator)); } } @@ -144,14 +145,14 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { LimitOffsetClause limitOffset) -> cppcoro::generator { auto limit = limitOffset._limit; uint64_t elementCount = 0; - for (auto&& idTable : original) { + for (IdTable& idTable : original) { elementCount += idTable.numRows(); AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); - co_yield std::move(idTable); + co_yield idTable; } AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); }(std::move(idTables()), limitOffset); - data_ = std::move(generator); + data_.emplace(std::move(generator)); } } @@ -190,18 +191,14 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { auto generator = [](cppcoro::generator original, VariableToColumnMap varColMap, auto performCheck) -> cppcoro::generator { - bool first = true; - for (auto&& idTable : original) { - if (first) { - first = false; - // No need to check subsequent idTables assuming the datatypes - // don't change mid result. - AD_EXPENSIVE_CHECK(performCheck(varColMap, idTable)); - } - co_yield std::move(idTable); + for (IdTable& idTable : original) { + // No need to check subsequent idTables assuming the datatypes + // don't change mid result. + AD_EXPENSIVE_CHECK(performCheck(varColMap, idTable)); + co_yield idTable; } }(std::move(idTables()), varColMap, std::move(performCheck)); - data_ = std::move(generator); + data_.emplace(std::move(generator)); } } @@ -234,7 +231,7 @@ void Result::runOnNewChunkComputed( } }(std::move(idTables()), std::move(onNewChunk), std::move(onGeneratorFinished)); - data_ = std::move(generator); + data_.emplace(std::move(generator)); } // _____________________________________________________________________________ @@ -265,7 +262,10 @@ const IdTable& Result::idTable() const { // _____________________________________________________________________________ cppcoro::generator& Result::idTables() const { AD_CONTRACT_CHECK(!isFullyMaterialized()); - return std::get>(data_); + const auto& container = std::get(data_); + AD_CONTRACT_CHECK(!container.consumed_); + container.consumed_ = true; + return container.generator_; } // _____________________________________________________________________________ @@ -279,7 +279,7 @@ void Result::cacheDuringConsumption( fitInCache, std::function storeInCache) { AD_CONTRACT_CHECK(!isFullyMaterialized()); - data_ = ad_utility::wrapGeneratorWithCache( + data_.emplace(ad_utility::wrapGeneratorWithCache( std::move(idTables()), [fitInCache = std::move(fitInCache)](std::optional& aggregate, const IdTable& newTable) { @@ -297,7 +297,7 @@ void Result::cacheDuringConsumption( localVocab = localVocab_](IdTable idTable) mutable { storeInCache(Result{std::move(idTable), std::move(sortedBy), SharedLocalVocabWrapper{std::move(localVocab)}}); - }); + })); } // _____________________________________________________________________________ diff --git a/src/engine/Result.h b/src/engine/Result.h index 2c5e0e809e..56a08bafe9 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -24,9 +24,16 @@ // evaluated. class Result { private: - using Data = std::variant>; - // The actual entries. Needs to be mutable in order to consume a const entry. - mutable Data data_; + // Needs to be mutable in order to be consumable from a const result. + struct GenContainer { + mutable cppcoro::generator generator_; + mutable bool consumed_ = false; + explicit GenContainer(cppcoro::generator generator) + : generator_{std::move(generator)} {} + }; + using Data = std::variant; + // The actual entries. + Data data_; // The column indices by which the result is sorted (primary sort key first). // Empty if the result is not sorted on any column. diff --git a/src/util/CacheableGenerator.h b/src/util/CacheableGenerator.h index 2b0266b058..881dd1c282 100644 --- a/src/util/CacheableGenerator.h +++ b/src/util/CacheableGenerator.h @@ -24,14 +24,14 @@ cppcoro::generator wrapGeneratorWithCache( InvocableWithExactReturnType auto onFullyCached) { std::optional aggregatedData{}; bool shouldBeAggregated = true; - for (auto&& element : generator) { + for (T& element : generator) { if (shouldBeAggregated) { shouldBeAggregated = aggregator(aggregatedData, element); if (!shouldBeAggregated) { aggregatedData.reset(); } } - co_yield std::move(element); + co_yield element; } if (aggregatedData.has_value()) { onFullyCached(std::move(aggregatedData).value()); From 4f713290dbb6548a32264645e7757d6a6147da8b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:55:07 +0200 Subject: [PATCH 117/133] Add Unit tests for edge case --- src/engine/ExportQueryExecutionTrees.h | 2 ++ test/ExportQueryExecutionTreesTest.cpp | 46 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 4d001338cf..51b9825998 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -205,4 +205,6 @@ class ExportQueryExecutionTrees { ensureCorrectSlicingOfIdTablesWhenFirstAndSecondArePartial); FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstAndLastArePartial); + FRIEND_TEST(ExportQueryExecutionTrees, + ensureGeneratorIsNotConsumedWhenNotRequired); }; diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 4df58591e7..aad14d4b1f 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1368,3 +1368,49 @@ TEST(ExportQueryExecutionTrees, ++iterator; EXPECT_EQ(iterator, generator.end()); } + +// _____________________________________________________________________________ +TEST(ExportQueryExecutionTrees, ensureGeneratorIsNotConsumedWhenNotRequired) { + { + auto throwingGenerator = []() -> cppcoro::generator { + ADD_FAILURE() << "Generator was started" << std::endl; + throw std::runtime_error("Generator was started"); + co_return; + }(); + + Result result{std::move(throwingGenerator), {}, LocalVocab{}}; + auto generator = ExportQueryExecutionTrees::getRowIndices( + LimitOffsetClause{._limit = 0, ._offset = 0}, result); + EXPECT_NO_THROW({ + for ([[maybe_unused]] const auto& info : generator) { + } + }); + } + + { + auto throwAfterYieldGenerator = []() -> cppcoro::generator { + IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; + idTable1.push_back({Id::makeFromInt(1)}); + + co_yield std::move(idTable1); + + ADD_FAILURE() << "Generator was resumed" << std::endl; + throw std::runtime_error("Generator was resumed"); + }(); + + Result result{std::move(throwAfterYieldGenerator), {}, LocalVocab{}}; + auto generator = ExportQueryExecutionTrees::getRowIndices( + LimitOffsetClause{._limit = 1, ._offset = 0}, result); + bool executed = false; + EXPECT_NO_THROW({ + for (const auto& [idTable, range] : generator) { + for (uint64_t i : range) { + executed = true; + EXPECT_EQ(idTable.at(i)[0], Id::makeFromInt(1)); + } + } + }); + + EXPECT_TRUE(executed); + } +} From 21beae559c5e2cc670a5807d0fda9c7b329ab04d Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 17 Aug 2024 01:06:03 +0200 Subject: [PATCH 118/133] Fix sortedBy and is defined check for multiple `IdTable`s --- src/engine/Result.cpp | 83 +++++++++++++++++++++---------------------- src/engine/Result.h | 20 ++++------- 2 files changed, 48 insertions(+), 55 deletions(-) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 0eeca8bfd5..d3700e37df 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -6,7 +6,8 @@ #include "engine/Result.h" -#include "engine/LocalVocab.h" +#include + #include "util/CacheableGenerator.h" #include "util/Exception.h" #include "util/Log.h" @@ -42,7 +43,7 @@ Result::Result(IdTable idTable, std::vector sortedBy, sortedBy_{std::move(sortedBy)}, localVocab_{std::move(localVocab.localVocab_)} { AD_CONTRACT_CHECK(localVocab_ != nullptr); - validateIdTable(this->idTable(), sortedBy_); + assertSortOrderIsRespected(this->idTable(), sortedBy_); } // _____________________________________________________________________________ @@ -57,8 +58,16 @@ Result::Result(cppcoro::generator idTables, SharedLocalVocabWrapper localVocab) : data_{GenContainer{ [](auto idTables, auto sortedBy) -> cppcoro::generator { + std::optional previousId = std::nullopt; for (IdTable& idTable : idTables) { - validateIdTable(idTable, sortedBy); + if (idTable.size() > 0) { + if (previousId.has_value()) { + AD_EXPENSIVE_CHECK(!compareRowsByJoinColumns(sortedBy)( + idTable.at(0), previousId.value())); + } + previousId = idTable.at(idTable.size() - 1); + } + assertSortOrderIsRespected(idTable, sortedBy); co_yield std::move(idTable); } }(std::move(idTables), sortedBy)}}, @@ -156,33 +165,17 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { } } -// _____________________________________________________________________________ -auto Result::computeDatatypeCountsPerColumn(IdTable& idTable) - -> DatatypeCountsPerColumn { - DatatypeCountsPerColumn types; - types.resize(idTable.numColumns()); - for (size_t i = 0; i < idTable.numColumns(); ++i) { - const auto& col = idTable.getColumn(i); - auto& datatypes = types.at(i); - for (Id id : col) { - ++datatypes[static_cast(id.getDatatype())]; - } - } - return types; -} - // _____________________________________________________________ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { auto performCheck = [](const auto& map, IdTable& idTable) { - DatatypeCountsPerColumn datatypeCountsPerColumn = - computeDatatypeCountsPerColumn(idTable); return std::ranges::all_of(map, [&](const auto& varAndCol) { const auto& [columnIndex, mightContainUndef] = varAndCol.second; - bool hasUndefined = - datatypeCountsPerColumn.at(columnIndex) - .at(static_cast(Datatype::Undefined)) != 0; - return mightContainUndef == ColumnIndexAndTypeInfo::PossiblyUndefined || - !hasUndefined; + if (mightContainUndef == ColumnIndexAndTypeInfo::AlwaysDefined) { + return std::ranges::all_of(idTable.getColumn(columnIndex), [](Id id) { + return id.getDatatype() != Datatype::Undefined; + }); + } + return true; }); }; if (isFullyMaterialized()) { @@ -219,9 +212,9 @@ void Result::runOnNewChunkComputed( [&onGeneratorFinished]() { onGeneratorFinished(false); }}; try { ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; - for (auto&& idTable : original) { + for (IdTable& idTable : original) { onNewChunk(idTable, timer.value()); - co_yield std::move(idTable); + co_yield idTable; timer.start(); } } catch (...) { @@ -235,22 +228,28 @@ void Result::runOnNewChunkComputed( } // _____________________________________________________________________________ -void Result::validateIdTable(const IdTable& idTable, - const std::vector& sortedBy) { - AD_CONTRACT_CHECK(std::ranges::all_of(sortedBy, [&idTable](size_t numCols) { - return numCols < idTable.numColumns(); - })); +auto Result::compareRowsByJoinColumns( + const std::vector& sortedBy) { + return [&sortedBy](const auto& row1, const auto& row2) { + for (ColumnIndex col : sortedBy) { + if (row1[col] != row2[col]) { + return row1[col] < row2[col]; + } + } + return false; + }; +} - [[maybe_unused]] auto compareRowsByJoinColumns = - [&sortedBy](const auto& row1, const auto& row2) { - for (size_t col : sortedBy) { - if (row1[col] != row2[col]) { - return row1[col] < row2[col]; - } - } - return false; - }; - AD_EXPENSIVE_CHECK(std::ranges::is_sorted(idTable, compareRowsByJoinColumns)); +// _____________________________________________________________________________ +void Result::assertSortOrderIsRespected( + const IdTable& idTable, const std::vector& sortedBy) { + AD_CONTRACT_CHECK( + std::ranges::all_of(sortedBy, [&idTable](ColumnIndex colIndex) { + return colIndex < idTable.numColumns(); + })); + + AD_EXPENSIVE_CHECK( + std::ranges::is_sorted(idTable, compareRowsByJoinColumns(sortedBy))); } // _____________________________________________________________________________ diff --git a/src/engine/Result.h b/src/engine/Result.h index 56a08bafe9..bd861f5d6f 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -15,7 +15,6 @@ #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "parser/data/LimitOffsetClause.h" -#include "util/CacheableGenerator.h" // The result of an `Operation`. This is the class QLever uses for all // intermediate or final results when processing a SPARQL query. The actual data @@ -74,19 +73,14 @@ class Result { std::make_shared(std::move(localVocab))} {} }; - // For each column in the result (the entries in the outer `vector`) and for - // each `Datatype` (the entries of the inner `array`), store the information - // how many entries of that datatype are stored in the column. - using DatatypeCountsPerColumn = std::vector< - std::array(Datatype::MaxValue) + 1>>; + // Helper function for `assertSortOrderIsRespected` that returns a lambda that + // ensures the sorting order based on `sortedBy`. + static auto compareRowsByJoinColumns( + const std::vector& sortedBy); - // Get the information, which columns stores how many entries of each - // datatype. - static DatatypeCountsPerColumn computeDatatypeCountsPerColumn( - IdTable& idTable); - - static void validateIdTable(const IdTable& idTable, - const std::vector& sortedBy); + // Check if sort order promised by `sortedBy` is kept within `idTable`. + static void assertSortOrderIsRespected( + const IdTable& idTable, const std::vector& sortedBy); public: // Construct from the given arguments (see above) and check the following From eadbc01361389aaf6698b8c8037b4220fd8ac640 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 17 Aug 2024 01:17:12 +0200 Subject: [PATCH 119/133] Consistent separator comment length --- src/engine/Result.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index d3700e37df..8e818a0840 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -165,7 +165,7 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { } } -// _____________________________________________________________ +// _____________________________________________________________________________ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { auto performCheck = [](const auto& map, IdTable& idTable) { return std::ranges::all_of(map, [&](const auto& varAndCol) { From cb86d14b550fef1b4a6166065859356aa51f8b11 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sat, 17 Aug 2024 01:51:42 +0200 Subject: [PATCH 120/133] Reorder function to fix build --- src/engine/Result.cpp | 25 ++++++++++++------------- src/engine/Result.h | 5 ----- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 8e818a0840..6c5eb054de 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -36,6 +36,18 @@ auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) // _____________________________________________________________________________ LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } +// _____________________________________________________________________________ +auto compareRowsByJoinColumns(const std::vector& sortedBy) { + return [&sortedBy](const auto& row1, const auto& row2) { + for (ColumnIndex col : sortedBy) { + if (row1[col] != row2[col]) { + return row1[col] < row2[col]; + } + } + return false; + }; +} + // _____________________________________________________________________________ Result::Result(IdTable idTable, std::vector sortedBy, SharedLocalVocabWrapper localVocab) @@ -227,19 +239,6 @@ void Result::runOnNewChunkComputed( data_.emplace(std::move(generator)); } -// _____________________________________________________________________________ -auto Result::compareRowsByJoinColumns( - const std::vector& sortedBy) { - return [&sortedBy](const auto& row1, const auto& row2) { - for (ColumnIndex col : sortedBy) { - if (row1[col] != row2[col]) { - return row1[col] < row2[col]; - } - } - return false; - }; -} - // _____________________________________________________________________________ void Result::assertSortOrderIsRespected( const IdTable& idTable, const std::vector& sortedBy) { diff --git a/src/engine/Result.h b/src/engine/Result.h index bd861f5d6f..6e865f6e4e 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -73,11 +73,6 @@ class Result { std::make_shared(std::move(localVocab))} {} }; - // Helper function for `assertSortOrderIsRespected` that returns a lambda that - // ensures the sorting order based on `sortedBy`. - static auto compareRowsByJoinColumns( - const std::vector& sortedBy); - // Check if sort order promised by `sortedBy` is kept within `idTable`. static void assertSortOrderIsRespected( const IdTable& idTable, const std::vector& sortedBy); From f0434b34f7e1781ff7acee166dd96cfd8de48cdf Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 18 Aug 2024 01:59:05 +0200 Subject: [PATCH 121/133] Add unit tests for newly added `ConcurrentCache` features --- test/ConcurrentCacheTest.cpp | 126 ++++++++++++++++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/test/ConcurrentCacheTest.cpp b/test/ConcurrentCacheTest.cpp index 8e4c023e1a..626b9b8bf9 100644 --- a/test/ConcurrentCacheTest.cpp +++ b/test/ConcurrentCacheTest.cpp @@ -2,7 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach (kalmbacj@informatik.uni-freiburg.de) -#include +#include +#include #include #include @@ -14,9 +15,12 @@ #include "util/ConcurrentCache.h" #include "util/DefaultValueSizeGetter.h" #include "util/Timer.h" +#include "util/jthread.h" using namespace std::literals; using namespace std::chrono_literals; +using namespace ad_utility::memory_literals; +using ::testing::Pointee; class ConcurrentSignal { std::atomic_flag flag_; @@ -323,3 +327,123 @@ TEST(ConcurrentCache, cacheStatusToString) { static_cast(notInCacheAndNotComputed) + 1); EXPECT_ANY_THROW(toString(outOfBounds)); } + +// _____________________________________________________________________________ +TEST(ConcurrentCache, isNotCachedIfUnsuitable) { + SimpleConcurrentLruCache cache{}; + + cache.clearAll(); + + auto result = cache.computeOnce( + 0, []() { return "abc"; }, false, [](const auto&) { return false; }); + + EXPECT_EQ(cache.numNonPinnedEntries(), 0); + EXPECT_EQ(cache.numPinnedEntries(), 0); + EXPECT_THAT(result._resultPointer, Pointee("abc"s)); +} + +// _____________________________________________________________________________ +TEST(ConcurrentCache, isNotCachedIfUnsuitableWhenWaitingForPendingComputation) { + SimpleConcurrentLruCache cache{}; + + auto resultInProgress = std::make_shared< + ad_utility::ConcurrentCacheDetail::ResultInProgress>(); + + cache.clearAll(); + cache.getStorage().wlock()->_inProgress[0] = + std::pair(false, resultInProgress); + + std::atomic_bool finished = false; + + ad_utility::JThread thread{[&]() { + std::this_thread::sleep_for(5ms); + resultInProgress->finish(nullptr); + finished = true; + }}; + + auto result = cache.computeOnce( + 0, []() { return "abc"; }, false, [](const auto&) { return false; }); + + EXPECT_TRUE(finished); + EXPECT_EQ(cache.numNonPinnedEntries(), 0); + EXPECT_EQ(cache.numPinnedEntries(), 0); + EXPECT_THAT(result._resultPointer, Pointee("abc"s)); +} + +// _____________________________________________________________________________ +TEST(ConcurrentCache, ifUnsuitableForCacheAndPinnedThrowsException) { + SimpleConcurrentLruCache cache{}; + + cache.clearAll(); + + EXPECT_THROW( + cache.computeOncePinned( + 0, []() { return "abc"; }, false, [](const auto&) { return false; }), + ad_utility::Exception); +} + +// _____________________________________________________________________________ +TEST(ConcurrentCache, + ifUnsuitableWhenWaitingForPendingComputationAndPinnedThrowsException) { + SimpleConcurrentLruCache cache{}; + + auto resultInProgress = std::make_shared< + ad_utility::ConcurrentCacheDetail::ResultInProgress>(); + + cache.clearAll(); + cache.getStorage().wlock()->_inProgress[0] = + std::pair(false, resultInProgress); + + std::atomic_bool finished = false; + + ad_utility::JThread thread{[&]() { + std::this_thread::sleep_for(5ms); + resultInProgress->finish(nullptr); + finished = true; + }}; + + EXPECT_THROW( + cache.computeOncePinned( + 0, []() { return "abc"; }, false, [](const auto&) { return false; }), + ad_utility::Exception); + EXPECT_TRUE(finished); +} + +// _____________________________________________________________________________ +TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) { + SimpleConcurrentLruCache cache{}; + + cache.tryInsertIfNotPresent(false, 0, std::make_shared("abc")); + + auto value = cache.getIfContained(0); + ASSERT_NE(value, std::nullopt); + EXPECT_THAT(value.value()._resultPointer, Pointee("abc"s)); + EXPECT_NE(cache.nonPinnedSize(), 0_B); + EXPECT_EQ(cache.pinnedSize(), 0_B); + + cache.tryInsertIfNotPresent(false, 0, std::make_shared("def")); + + value = cache.getIfContained(0); + ASSERT_NE(value, std::nullopt); + EXPECT_THAT(value.value()._resultPointer, Pointee("abc"s)); + EXPECT_NE(cache.nonPinnedSize(), 0_B); + EXPECT_EQ(cache.pinnedSize(), 0_B); + + cache.tryInsertIfNotPresent(true, 0, std::make_shared("ghi")); + + value = cache.getIfContained(0); + ASSERT_NE(value, std::nullopt); + EXPECT_THAT(value.value()._resultPointer, Pointee("abc"s)); + EXPECT_EQ(cache.nonPinnedSize(), 0_B); + EXPECT_NE(cache.pinnedSize(), 0_B); + + cache.clearAll(); + + cache.tryInsertIfNotPresent(true, 0, std::make_shared("jkl")); + + value = cache.getIfContained(0); + ASSERT_NE(value, std::nullopt); + EXPECT_THAT(value.value()._resultPointer, Pointee("jkl"s)); + EXPECT_EQ(cache.nonPinnedSize(), 0_B); + EXPECT_NE(cache.pinnedSize(), 0_B); +} From d181a9fe2c1b848d2d13e359f55b53228ccbf250 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 18 Aug 2024 01:59:28 +0200 Subject: [PATCH 122/133] Add unit tests for lazy index scans --- test/engine/IndexScanTest.cpp | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index abc5babd38..315f429a7d 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -444,3 +444,44 @@ TEST(IndexScan, getResultSizeOfScan) { ASSERT_EQ(res.idTable().numColumns(), 0); } } + +// _____________________________________________________________________________ +TEST(IndexScan, computeResultCanBeConsumedLazily) { + using V = Variable; + auto qec = getQec("

, . .", true, false); + auto getId = makeGetId(qec->getIndex()); + auto x = getId(""); + auto p = getId("

"); + auto s1 = getId(""); + auto s2 = getId(""); + auto p2 = getId(""); + SparqlTripleSimple scanTriple{V{"?x"}, V{"?y"}, V{"?z"}}; + IndexScan scan{qec, Permutation::Enum::POS, scanTriple}; + + ProtoResult result = scan.computeResultOnlyForTesting(true); + + ASSERT_FALSE(result.isFullyMaterialized()); + + std::vector resultValues; + + for (IdTable& idTable : result.idTables()) { + for (IdTable::row_type row : idTable) { + resultValues.push_back(row); + } + } + + ASSERT_EQ(resultValues.size(), 3); + ASSERT_EQ(resultValues[0].numColumns(), 3); + ASSERT_EQ(resultValues[1].numColumns(), 3); + ASSERT_EQ(resultValues[2].numColumns(), 3); + + EXPECT_EQ(resultValues[0][2], x); + EXPECT_EQ(resultValues[0][0], p); + EXPECT_EQ(resultValues[0][1], s1); + EXPECT_EQ(resultValues[1][2], x); + EXPECT_EQ(resultValues[1][0], p); + EXPECT_EQ(resultValues[1][1], s2); + EXPECT_EQ(resultValues[2][2], x); + EXPECT_EQ(resultValues[2][0], p2); + EXPECT_EQ(resultValues[2][1], s1); +} From 68dcb6f19340cb26af102f20507db94c6930c3ce Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 18 Aug 2024 02:02:55 +0200 Subject: [PATCH 123/133] Fix wrong filter calculations --- src/engine/ExportQueryExecutionTrees.cpp | 3 +- src/engine/Operation.cpp | 36 +++++++++++++++++------- src/engine/Operation.h | 7 +++++ src/engine/Result.cpp | 11 ++++---- src/engine/Result.h | 6 ++-- src/engine/RuntimeInformation.cpp | 2 -- src/engine/RuntimeInformation.h | 8 +++--- test/RuntimeInformationTest.cpp | 14 ++++----- 8 files changed, 55 insertions(+), 32 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 4bda95d994..80af2793b3 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -705,8 +705,7 @@ nlohmann::json ExportQueryExecutionTrees::computeQueryResultAsQLeverJSON( j["runtimeInformation"]["meta"] = nlohmann::ordered_json( qet.getRootOperation()->getRuntimeInfoWholeQuery()); RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo(); - runtimeInformation.addLimitOffsetRow( - query._limitOffset, std::chrono::milliseconds::zero(), false); + runtimeInformation.addLimitOffsetRow(query._limitOffset, false); j["runtimeInformation"]["query_execution_tree"] = nlohmann::ordered_json(runtimeInformation); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index e35938ac24..1dd355b72a 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -69,6 +69,23 @@ void Operation::recursivelySetTimeConstraint( }); } +// _____________________________________________________________________________ +void Operation::updateRuntimeStats(bool applyToFilter, uint64_t numRows, + uint64_t numCols, + std::chrono::milliseconds duration) { + auto& rti = applyToFilter || !externalFilterApplied_ + ? runtimeInfo() + : *runtimeInfo().children_.at(0); + rti.totalTime_ += duration; + rti.originalOperationTime_ = rti.getOperationTime(); + rti.numRows_ += numRows; + rti.numCols_ = numCols; + if (!applyToFilter && externalFilterApplied_) { + runtimeInfo().totalTime_ += duration; + runtimeInfo().originalOperationTime_ = rti.getOperationTime(); + } +} + // _____________________________________________________________________________ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, ComputationMode computationMode) { @@ -110,12 +127,9 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, timeSizeUpdate += duration; auto msPrecision = std::chrono::duration_cast(overlap); - runtimeInfo().totalTime_ += msPrecision; + updateRuntimeStats(false, idTable.numRows(), idTable.numColumns(), + msPrecision); overlap -= msPrecision; - runtimeInfo().originalOperationTime_ = - runtimeInfo().getOperationTime(); - runtimeInfo().numRows_ += idTable.numRows(); - runtimeInfo().numCols_ = idTable.numColumns(); LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() << " x " << idTable.numColumns() << std::endl; if (timeSizeUpdate > 50ms) { @@ -139,16 +153,18 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, // export, allowing the cache to reuse the same operation for different // limits and offsets. if (!supportsLimit()) { - runtimeInfo().addLimitOffsetRow(_limit, std::chrono::milliseconds{0}, true); + runtimeInfo().addLimitOffsetRow(_limit, true); + AD_CONTRACT_CHECK(!externalFilterApplied_); + externalFilterApplied_ = _limit._limit.has_value() || _limit._offset != 0; result.applyLimitOffset( - _limit, [runtimeInfo = getRuntimeInfoPointer(), - overlap = 0us](std::chrono::microseconds limitTime) mutable { + _limit, [this, overlap = 0us](std::chrono::microseconds limitTime, + const IdTable& idTable) mutable { overlap += limitTime; auto msPrecision = std::chrono::duration_cast(overlap); - runtimeInfo->totalTime_ += msPrecision; + updateRuntimeStats(true, idTable.numRows(), idTable.numColumns(), + msPrecision); overlap -= msPrecision; - runtimeInfo->originalOperationTime_ = runtimeInfo->getOperationTime(); }); } else { result.assertThatLimitWasRespected(_limit); diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 880151eaeb..8aa2592d70 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -260,6 +260,12 @@ class Operation { //! Compute the result of the query-subtree rooted at this element.. virtual ProtoResult computeResult(bool requestLaziness) = 0; + // Update the runtime information of this operation according to the given + // arguments, considering the possibility that the initial runtime information + // was replaced by calling `RuntimeInformation::addLimitOffsetRow`. + void updateRuntimeStats(bool applyToFilter, uint64_t numRows, + uint64_t numCols, std::chrono::milliseconds duration); + // Perform the expensive computation modeled by the subclass of this // `Operation`. The value provided by `computationMode` decides if lazy // results are preferred. It must not be `ONLY_IF_CACHED`, this will lead to @@ -373,4 +379,5 @@ class Operation { // Store the list of columns by which the result is sorted. mutable std::optional> _resultSortedColumns = std::nullopt; + bool externalFilterApplied_ = false; }; diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 6c5eb054de..454fad7fde 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -114,7 +114,8 @@ void resizeIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { // _____________________________________________________________________________ void Result::applyLimitOffset( const LimitOffsetClause& limitOffset, - std::function limitTimeCallback) { + std::function + limitTimeCallback) { // Apply the OFFSET clause. If the offset is `0` or the offset is larger // than the size of the `IdTable`, then this has no effect and runtime // `O(1)` (see the docs for `std::shift_left`). @@ -122,12 +123,12 @@ void Result::applyLimitOffset( if (isFullyMaterialized()) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; resizeIdTable(std::get(data_), limitOffset); - limitTimeCallback(limitTimer.msecs()); + limitTimeCallback(limitTimer.msecs(), idTable()); } else { auto generator = [](cppcoro::generator original, LimitOffsetClause limitOffset, - std::function limitTimeCallback) - -> cppcoro::generator { + std::function + limitTimeCallback) -> cppcoro::generator { if (limitOffset._limit.value_or(1) == 0) { co_return; } @@ -141,7 +142,7 @@ void Result::applyLimitOffset( limitOffset._limit.value() -= limitOffset.actualSize(originalSize - offsetDelta); } - limitTimeCallback(limitTimer.value()); + limitTimeCallback(limitTimer.value(), idTable); if (limitOffset._offset == 0) { co_yield idTable; } diff --git a/src/engine/Result.h b/src/engine/Result.h index 6e865f6e4e..ff68ec161e 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -199,13 +199,15 @@ class Result { // This also applies if `data_` holds a generator yielding `IdTable`s, where // this is applied respectively. // `limitTimeCallback` is called whenever an `IdTable` is resized with the - // number of microseconds it took to perform this operation. + // number of microseconds it took to perform this operation and the freshly + // resized `IdTable` as const reference. // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those are still correct after performing this operation. void applyLimitOffset( const LimitOffsetClause& limitOffset, - std::function limitTimeCallback); + std::function + limitTimeCallback); // Check if the operation did fulfill its contract and only returns as many // elements as requested by the provided `limitOffset`. Throw an diff --git a/src/engine/RuntimeInformation.cpp b/src/engine/RuntimeInformation.cpp index fd3218194f..a8724f71a1 100644 --- a/src/engine/RuntimeInformation.cpp +++ b/src/engine/RuntimeInformation.cpp @@ -219,7 +219,6 @@ void to_json(nlohmann::ordered_json& j, // __________________________________________________________________________ void RuntimeInformation::addLimitOffsetRow(const LimitOffsetClause& l, - Milliseconds timeForLimit, bool fullResultIsNotCached) { bool hasLimit = l._limit.has_value(); bool hasOffset = l._offset != 0; @@ -233,7 +232,6 @@ void RuntimeInformation::addLimitOffsetRow(const LimitOffsetClause& l, numRows_ = l.actualSize(actualOperation->numRows_); details_.clear(); cacheStatus_ = ad_utility::CacheStatus::computed; - totalTime_ += timeForLimit; actualOperation->addDetail("not-written-to-cache-because-child-of-limit", fullResultIsNotCached); actualOperation->eraseDetail("limit"); diff --git a/src/engine/RuntimeInformation.h b/src/engine/RuntimeInformation.h index f791c0cd63..1ba24d658b 100644 --- a/src/engine/RuntimeInformation.h +++ b/src/engine/RuntimeInformation.h @@ -129,10 +129,10 @@ class RuntimeInformation { // Set the runtime information for a LIMIT or OFFSET operation as the new root // of the tree and make the old root the only child of the LIMIT operation. - // The details of the LIMIT/OFFSET, the time (in ms) that was spent computing - // it, and the information whether the `actual` operation (the old root of the - // runtime information) is written to the cache, are passed in as arguments. - void addLimitOffsetRow(const LimitOffsetClause& l, Milliseconds timeForLimit, + // The details of the LIMIT/OFFSET and the information whether the `actual` + // operation (the old root of the runtime information) is written to the + // cache, are passed in as arguments. + void addLimitOffsetRow(const LimitOffsetClause& l, bool fullResultIsNotCached); static std::string_view toString(Status status); diff --git a/test/RuntimeInformationTest.cpp b/test/RuntimeInformationTest.cpp index 9e0acc0b73..4a32c6f62f 100644 --- a/test/RuntimeInformationTest.cpp +++ b/test/RuntimeInformationTest.cpp @@ -16,22 +16,22 @@ TEST(RuntimeInformation, addLimitOffsetRow) { rti.totalTime_ = 4ms; rti.sizeEstimate_ = 34; - rti.addLimitOffsetRow(LimitOffsetClause{}, 5ms, true); + rti.addLimitOffsetRow(LimitOffsetClause{}, true); EXPECT_FALSE( rti.details_.contains("not-written-to-cache-because-child-of-limit")); EXPECT_FALSE( rti.details_.contains("executed-implicitly-during-query-export")); - rti.addLimitOffsetRow(LimitOffsetClause{}, 5ms, false); + rti.addLimitOffsetRow(LimitOffsetClause{}, false); EXPECT_FALSE( rti.details_.contains("not-written-to-cache-because-child-of-limit")); EXPECT_FALSE( rti.details_.contains("executed-implicitly-during-query-export")); - rti.addLimitOffsetRow(LimitOffsetClause{23, 4, 1}, 20ms, true); + rti.addLimitOffsetRow(LimitOffsetClause{23, 4, 1}, true); EXPECT_EQ(rti.descriptor_, "LIMIT 23 OFFSET 4"); - EXPECT_EQ(rti.totalTime_, 24ms); - EXPECT_EQ(rti.getOperationTime(), 20ms); + EXPECT_EQ(rti.totalTime_, 4ms); + EXPECT_EQ(rti.getOperationTime(), 0ms); ASSERT_EQ(rti.children_.size(), 1u); auto& child = *rti.children_.at(0); @@ -41,13 +41,13 @@ TEST(RuntimeInformation, addLimitOffsetRow) { EXPECT_TRUE(child.details_.at("not-written-to-cache-because-child-of-limit")); EXPECT_FALSE(rti.details_.at("executed-implicitly-during-query-export")); - rti.addLimitOffsetRow(LimitOffsetClause{std::nullopt, 17, 1}, 15ms, false); + rti.addLimitOffsetRow(LimitOffsetClause{std::nullopt, 17, 1}, false); EXPECT_FALSE(rti.children_.at(0)->details_.at( "not-written-to-cache-because-child-of-limit")); EXPECT_TRUE(rti.details_.at("executed-implicitly-during-query-export")); EXPECT_EQ(rti.descriptor_, "OFFSET 17"); - rti.addLimitOffsetRow(LimitOffsetClause{42, 0, 1}, 15ms, true); + rti.addLimitOffsetRow(LimitOffsetClause{42, 0, 1}, true); EXPECT_EQ(rti.descriptor_, "LIMIT 42"); } From 6c785b29d595fdb2fdeb1d7889c48cefaf563b66 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Sun, 18 Aug 2024 21:50:39 +0200 Subject: [PATCH 124/133] Add tests for new `Operation` functionality --- src/engine/Operation.cpp | 14 +- src/engine/Operation.h | 13 + test/CMakeLists.txt | 2 +- test/FilterTest.cpp | 73 +----- test/OperationTest.cpp | 424 ++++++++++++++++++++++++++++++- test/engine/ValuesForTesting.h | 88 +++++-- test/util/OperationTestHelpers.h | 50 ++++ 7 files changed, 575 insertions(+), 89 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 1dd355b72a..6214bafcd3 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -77,12 +77,16 @@ void Operation::updateRuntimeStats(bool applyToFilter, uint64_t numRows, ? runtimeInfo() : *runtimeInfo().children_.at(0); rti.totalTime_ += duration; + rti.originalTotalTime_ = rti.totalTime_; rti.originalOperationTime_ = rti.getOperationTime(); - rti.numRows_ += numRows; - rti.numCols_ = numCols; + if (!applyToFilter || externalFilterApplied_) { + rti.numRows_ += numRows; + rti.numCols_ = numCols; + } if (!applyToFilter && externalFilterApplied_) { runtimeInfo().totalTime_ += duration; - runtimeInfo().originalOperationTime_ = rti.getOperationTime(); + runtimeInfo().originalTotalTime_ = runtimeInfo().totalTime_; + runtimeInfo().originalOperationTime_ = runtimeInfo().getOperationTime(); } } @@ -191,10 +195,12 @@ CacheValue Operation::runComputationAndPrepareForCache( }, [runtimeInfo = getRuntimeInfoPointer(), &cache, cacheKey](Result aggregatedResult) { + auto copy = *runtimeInfo; + copy.status_ = RuntimeInformation::Status::fullyMaterialized; cache.tryInsertIfNotPresent( false, cacheKey, std::make_shared(std::move(aggregatedResult), - *runtimeInfo)); + std::move(copy))); }); } if (result.isFullyMaterialized()) { diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 8aa2592d70..fe8b5cd8af 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -6,6 +6,8 @@ #pragma once +#include + #include #include @@ -380,4 +382,15 @@ class Operation { mutable std::optional> _resultSortedColumns = std::nullopt; bool externalFilterApplied_ = false; + FRIEND_TEST(Operation, updateRuntimeStatsWorksCorrectly); + FRIEND_TEST(Operation, verifyRuntimeInformationIsUpdatedForLazyOperations); + FRIEND_TEST(Operation, ensureFailedStatusIsSetWhenGeneratorThrowsException); + FRIEND_TEST(Operation, testSubMillisecondsIncrementsAreStillTracked); + FRIEND_TEST(Operation, ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd); + FRIEND_TEST(Operation, + ensureSignalUpdateIsCalledAtTheEndOfPartialConsumption); + FRIEND_TEST(Operation, + verifyLimitIsProperlyAppliedAndUpdatesRuntimeInfoCorrectly); + FRIEND_TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough); + FRIEND_TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge); }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cc859dbcf5..4da4ff119a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -397,4 +397,4 @@ addLinkAndDiscoverTest(CopyableSynchronizationTest) addLinkAndDiscoverTest(CacheableGeneratorTest) -addLinkAndDiscoverTest(FilterTest) +addLinkAndDiscoverTest(FilterTest engine) diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index 4e40d68989..5fff02c8cf 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -11,70 +11,7 @@ using ::testing::ElementsAre; -class LazyValueOperation : public Operation { - public: - std::vector getChildren() override { return {}; } - string getDescriptor() const override { return "Descriptor"; } - size_t getResultWidth() const override { return 0; } - size_t getCostEstimate() override { return 0; } - uint64_t getSizeEstimateBeforeLimit() override { return 0; } - float getMultiplicity(size_t) override { return 1; } - bool knownEmptyResult() override { return false; } - [[nodiscard]] vector resultSortedOn() const override { - return {}; - } - VariableToColumnMap computeVariableToColumnMap() const override { - return {{Variable{"?x"}, - ColumnIndexAndTypeInfo{ - 0, ColumnIndexAndTypeInfo::UndefStatus::AlwaysDefined}}}; - } - - std::vector idTables_; - - explicit LazyValueOperation(QueryExecutionContext* qec, - std::vector idTables) - : Operation{qec}, idTables_{std::move(idTables)} { - AD_CONTRACT_CHECK(!idTables_.empty()); - } - - string getCacheKeyImpl() const override { - std::ostringstream stream; - for (const IdTable& idTable : idTables_) { - for (const auto& row : idTable) { - stream << "{ "; - for (const auto& cell : row) { - stream << cell << ' '; - } - stream << "}\n"; - } - } - return std::move(stream).str(); - } - - ProtoResult computeResult(bool requestLaziness) override { - if (requestLaziness) { - std::vector clones; - clones.reserve(idTables_.size()); - for (const IdTable& idTable : idTables_) { - clones.push_back(idTable.clone()); - } - auto generator = [](auto idTables) -> cppcoro::generator { - for (IdTable& idTable : idTables) { - co_yield std::move(idTable); - } - }(std::move(clones)); - return {std::move(generator), resultSortedOn(), LocalVocab{}}; - } - IdTable aggregateTable{idTables_.at(0).numColumns(), - idTables_.at(0).getAllocator()}; - for (const IdTable& idTable : idTables_) { - aggregateTable.insertAtEnd(idTable); - } - return {std::move(aggregateTable), resultSortedOn(), LocalVocab{}}; - } -}; - -IdTable makeIdTable(std::initializer_list bools) { +IdTable makeIdTable(std::vector bools) { IdTable idTable{1, ad_utility::makeUnlimitedAllocator()}; for (bool b : bools) { idTable.push_back({Id::makeFromBool(b)}); @@ -99,9 +36,9 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { idTables.push_back(makeIdTable({false, false, false})); idTables.push_back(makeIdTable({true})); - LazyValueOperation values{qec, std::move(idTables)}; + ValuesForTesting values{qec, std::move(idTables), {Variable{"?x"}}}; QueryExecutionTree subTree{ - qec, std::make_shared(std::move(values))}; + qec, std::make_shared(std::move(values))}; Filter filter{ qec, std::make_shared(std::move(subTree)), @@ -148,9 +85,9 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnNonLazyEvaluation) { idTables.push_back(makeIdTable({false, false, false})); idTables.push_back(makeIdTable({true})); - LazyValueOperation values{qec, std::move(idTables)}; + ValuesForTesting values{qec, std::move(idTables), {Variable{"?x"}}}; QueryExecutionTree subTree{ - qec, std::make_shared(std::move(values))}; + qec, std::make_shared(std::move(values))}; Filter filter{ qec, std::make_shared(std::move(subTree)), diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 9e9d47571a..c45d34f40a 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -3,7 +3,6 @@ // Author: Johannes Kalmbach (joka921) #include -#include #include "engine/NeutralElementOperation.h" #include "engine/ValuesForTesting.h" @@ -221,3 +220,426 @@ TEST(Operation, createRuntimInfoFromEstimates) { EXPECT_EQ(operation.runtimeInfo().details_["limit"], 12); EXPECT_EQ(operation.runtimeInfo().details_["offset"], 3); } + +// _____________________________________________________________________________ +TEST(Operation, lazilyEvaluatedOperationIsNotCached) { + using V = Variable; + auto qec = getQec(); + SparqlTripleSimple scanTriple{V{"?x"}, V{"?y"}, V{"?z"}}; + IndexScan scan{qec, Permutation::Enum::POS, scanTriple}; + + qec->getQueryTreeCache().clearAll(); + auto result = scan.getResult(true, ComputationMode::LAZY_IF_SUPPORTED); + ASSERT_NE(result, nullptr); + EXPECT_FALSE(result->isFullyMaterialized()); + + EXPECT_EQ(qec->getQueryTreeCache().numNonPinnedEntries(), 0); + EXPECT_EQ(qec->getQueryTreeCache().numPinnedEntries(), 0); +} + +// _____________________________________________________________________________ +TEST(Operation, updateRuntimeStatsWorksCorrectly) { + auto qec = getQec(); + auto idTable = makeIdTableFromVector({{3, 4}, {7, 8}, {9, 123}}); + ValuesForTesting valuesForTesting{ + qec, std::move(idTable), {Variable{"?x"}, Variable{"?y"}}}; + + auto& rti = valuesForTesting.runtimeInfo(); + + // Test operation with built-in filter + valuesForTesting.externalFilterApplied_ = false; + valuesForTesting.updateRuntimeStats(false, 11, 13, 17ms); + + EXPECT_EQ(rti.numCols_, 13); + EXPECT_EQ(rti.numRows_, 11); + EXPECT_EQ(rti.totalTime_, 17ms); + EXPECT_EQ(rti.originalTotalTime_, 17ms); + EXPECT_EQ(rti.originalOperationTime_, 17ms); + + // Test built-in filter + valuesForTesting.externalFilterApplied_ = false; + valuesForTesting.updateRuntimeStats(true, 5, 3, 7ms); + + EXPECT_EQ(rti.numCols_, 13); + EXPECT_EQ(rti.numRows_, 11); + EXPECT_EQ(rti.totalTime_, 17ms + 7ms); + EXPECT_EQ(rti.originalTotalTime_, 17ms + 7ms); + EXPECT_EQ(rti.originalOperationTime_, 17ms + 7ms); + + rti.children_ = {std::make_shared()}; + rti.numCols_ = 0; + rti.numRows_ = 0; + rti.totalTime_ = 0ms; + rti.originalOperationTime_ = 0ms; + auto& childRti = *rti.children_.at(0); + + // Test operation with external filter + valuesForTesting.externalFilterApplied_ = true; + valuesForTesting.updateRuntimeStats(false, 31, 37, 41ms); + + EXPECT_EQ(rti.numCols_, 0); + EXPECT_EQ(rti.numRows_, 0); + EXPECT_EQ(rti.totalTime_, 41ms); + EXPECT_EQ(rti.originalTotalTime_, 41ms); + EXPECT_EQ(rti.originalOperationTime_, 0ms); + + EXPECT_EQ(childRti.numCols_, 37); + EXPECT_EQ(childRti.numRows_, 31); + EXPECT_EQ(childRti.totalTime_, 41ms); + EXPECT_EQ(childRti.originalTotalTime_, 41ms); + EXPECT_EQ(childRti.originalOperationTime_, 41ms); + + // Test external filter + valuesForTesting.externalFilterApplied_ = true; + valuesForTesting.updateRuntimeStats(true, 19, 23, 29ms); + + EXPECT_EQ(rti.numCols_, 23); + EXPECT_EQ(rti.numRows_, 19); + EXPECT_EQ(rti.totalTime_, 41ms + 29ms); + EXPECT_EQ(rti.originalTotalTime_, 41ms + 29ms); + EXPECT_EQ(rti.originalOperationTime_, 29ms); + + EXPECT_EQ(childRti.numCols_, 37); + EXPECT_EQ(childRti.numRows_, 31); + EXPECT_EQ(childRti.totalTime_, 41ms); + EXPECT_EQ(childRti.originalTotalTime_, 41ms); + EXPECT_EQ(childRti.originalOperationTime_, 41ms); +} + +// _____________________________________________________________________________ +TEST(Operation, verifyRuntimeInformationIsUpdatedForLazyOperations) { + auto qec = getQec(); + std::vector idTablesVector{}; + idTablesVector.push_back(makeIdTableFromVector({{3, 4}})); + idTablesVector.push_back(makeIdTableFromVector({{7, 8}})); + ValuesForTesting valuesForTesting{ + qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}}; + + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + EXPECT_THROW( + valuesForTesting.runComputation(timer, ComputationMode::ONLY_IF_CACHED), + ad_utility::Exception); + + auto result = valuesForTesting.runComputation( + timer, ComputationMode::LAZY_IF_SUPPORTED); + + EXPECT_EQ(valuesForTesting.runtimeInfo().status_, + RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(valuesForTesting.runtimeInfo().totalTime_, 0ms); + EXPECT_EQ(valuesForTesting.runtimeInfo().originalTotalTime_, 0ms); + EXPECT_EQ(valuesForTesting.runtimeInfo().originalOperationTime_, 0ms); + + auto& idTables = result.idTables(); + + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + + EXPECT_EQ(valuesForTesting.runtimeInfo().status_, + RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 1); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + + EXPECT_EQ(valuesForTesting.runtimeInfo().status_, + RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 2); + + ++iterator; + ASSERT_EQ(iterator, idTables.end()); + + EXPECT_EQ(valuesForTesting.runtimeInfo().status_, + RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 2); +} + +// _____________________________________________________________________________ +TEST(Operation, ensureFailedStatusIsSetWhenGeneratorThrowsException) { + bool signaledUpdate = false; + Index index = makeTestIndex( + "ensureFailedStatusIsSetWhenGeneratorThrowsException", std::nullopt, true, + true, true, ad_utility::MemorySize::bytes(16), false); + QueryResultCache cache{}; + QueryExecutionContext context{ + index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), + SortPerformanceEstimator{}, [&](std::string) { signaledUpdate = true; }}; + AlwaysFailLazyOperation operation{&context}; + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + auto result = + operation.runComputation(timer, ComputationMode::LAZY_IF_SUPPORTED); + + EXPECT_EQ(operation.runtimeInfo().status_, + RuntimeInformation::Status::lazilyMaterialized); + + EXPECT_THROW(result.idTables().begin(), std::runtime_error); + + EXPECT_EQ(operation.runtimeInfo().status_, + RuntimeInformation::Status::failed); + EXPECT_TRUE(signaledUpdate); +} + +// _____________________________________________________________________________ +TEST(Operation, testSubMillisecondsIncrementsAreStillTracked) { +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); +#endif + auto idTable = makeIdTableFromVector({{}}); + CustomGeneratorOperation operation{ + getQec(), [](const IdTable& idTable) -> cppcoro::generator { + std::this_thread::sleep_for(300us); + co_yield idTable.clone(); + std::this_thread::sleep_for(300us); + co_yield idTable.clone(); + std::this_thread::sleep_for(500us); + co_yield idTable.clone(); + }(idTable)}; + + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + auto result = + operation.runComputation(timer, ComputationMode::LAZY_IF_SUPPORTED); + + EXPECT_EQ(operation.runtimeInfo().totalTime_, 0ms); + EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 0ms); + EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 0ms); + + auto& idTables = result.idTables(); + + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(operation.runtimeInfo().totalTime_, 0ms); + EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 0ms); + EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 0ms); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(operation.runtimeInfo().totalTime_, 0ms); + EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 0ms); + EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 0ms); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(operation.runtimeInfo().totalTime_, 1ms); + EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 1ms); + EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 1ms); + + ++iterator; + ASSERT_EQ(iterator, idTables.end()); +} + +// _____________________________________________________________________________ +TEST(Operation, ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd) { +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); +#endif + uint32_t updateCallCounter = 0; + auto idTable = makeIdTableFromVector({{}}); + Index index = makeTestIndex( + "ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd", std::nullopt, true, + true, true, ad_utility::MemorySize::bytes(16), false); + QueryResultCache cache{}; + QueryExecutionContext context{ + index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), + SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }}; + CustomGeneratorOperation operation{ + &context, [](const IdTable& idTable) -> cppcoro::generator { + std::this_thread::sleep_for(50ms); + co_yield idTable.clone(); + // This one should not trigger because it's below the 50ms threshold + std::this_thread::sleep_for(30ms); + co_yield idTable.clone(); + std::this_thread::sleep_for(30ms); + co_yield idTable.clone(); + // This one should not trigger directly, but trigger because it's the + // last one + std::this_thread::sleep_for(30ms); + co_yield idTable.clone(); + }(idTable)}; + + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + auto result = + operation.runComputation(timer, ComputationMode::LAZY_IF_SUPPORTED); + + EXPECT_EQ(updateCallCounter, 1); + + auto& idTables = result.idTables(); + + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(updateCallCounter, 2); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(updateCallCounter, 2); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(updateCallCounter, 3); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(updateCallCounter, 3); + + ++iterator; + ASSERT_EQ(iterator, idTables.end()); + EXPECT_EQ(updateCallCounter, 4); +} + +// _____________________________________________________________________________ +TEST(Operation, ensureSignalUpdateIsCalledAtTheEndOfPartialConsumption) { + uint32_t updateCallCounter = 0; + auto idTable = makeIdTableFromVector({{}}); + Index index = makeTestIndex( + "ensureSignalUpdateIsCalledAtTheEndOfPartialConsumption", std::nullopt, + true, true, true, ad_utility::MemorySize::bytes(16), false); + QueryResultCache cache{}; + QueryExecutionContext context{ + index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), + SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }}; + CustomGeneratorOperation operation{ + &context, [](const IdTable& idTable) -> cppcoro::generator { + co_yield idTable.clone(); + co_yield idTable.clone(); + }(idTable)}; + + { + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + auto result = + operation.runComputation(timer, ComputationMode::LAZY_IF_SUPPORTED); + + EXPECT_EQ(updateCallCounter, 1); + auto& idTables = result.idTables(); + // Only consume partially + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + EXPECT_EQ(updateCallCounter, 1); + } + + // Destructor of result should call this function + EXPECT_EQ(updateCallCounter, 2); +} + +// _____________________________________________________________________________ +TEST(Operation, verifyLimitIsProperlyAppliedAndUpdatesRuntimeInfoCorrectly) { + auto qec = getQec(); + std::vector idTablesVector{}; + idTablesVector.push_back(makeIdTableFromVector({{3, 4}})); + idTablesVector.push_back(makeIdTableFromVector({{7, 8}, {9, 123}})); + ValuesForTesting valuesForTesting{ + qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}}; + + valuesForTesting.setLimit({._limit = 1, ._offset = 1}); + + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + + auto result = valuesForTesting.runComputation( + timer, ComputationMode::LAZY_IF_SUPPORTED); + + EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 0); + EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 0); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 0); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 0); + + auto& idTables = result.idTables(); + + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + + EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 0); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 1); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + + EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 1); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 3); + + ++iterator; + ASSERT_EQ(iterator, idTables.end()); + + EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 1); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 2); + EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 3); +} + +// _____________________________________________________________________________ +TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough) { + auto qec = getQec(); + qec->getQueryTreeCache().clearAll(); + std::vector idTablesVector{}; + idTablesVector.push_back(makeIdTableFromVector({{3, 4}})); + idTablesVector.push_back(makeIdTableFromVector({{7, 8}, {9, 123}})); + ValuesForTesting valuesForTesting{ + qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}}; + + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + + auto cacheValue = valuesForTesting.runComputationAndPrepareForCache( + timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); + + for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + } + + auto aggregatedValue = qec->getQueryTreeCache().getIfContained("test"); + ASSERT_TRUE(aggregatedValue.has_value()); + + ASSERT_TRUE(aggregatedValue.value()._resultPointer); + auto newRuntimeInfo = aggregatedValue.value()._resultPointer->runtimeInfo(); + auto& oldRuntimeInfo = valuesForTesting.runtimeInfo(); + EXPECT_EQ(newRuntimeInfo.descriptor_, oldRuntimeInfo.descriptor_); + EXPECT_EQ(newRuntimeInfo.numCols_, oldRuntimeInfo.numCols_); + EXPECT_EQ(newRuntimeInfo.numRows_, oldRuntimeInfo.numRows_); + EXPECT_EQ(newRuntimeInfo.totalTime_, oldRuntimeInfo.totalTime_); + EXPECT_EQ(newRuntimeInfo.originalTotalTime_, + oldRuntimeInfo.originalTotalTime_); + EXPECT_EQ(newRuntimeInfo.originalOperationTime_, + oldRuntimeInfo.originalOperationTime_); + EXPECT_EQ(newRuntimeInfo.status_, + RuntimeInformation::Status::fullyMaterialized); + + const auto& aggregatedResult = + aggregatedValue.value()._resultPointer->resultTable(); + ASSERT_TRUE(aggregatedResult.isFullyMaterialized()); + + const auto& idTable = aggregatedResult.idTable(); + ASSERT_EQ(idTable.numColumns(), 2); + ASSERT_EQ(idTable.numRows(), 3); + + EXPECT_EQ(idTable, makeIdTableFromVector({{3, 4}, {7, 8}, {9, 123}})); +} + +// _____________________________________________________________________________ +TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge) { + auto qec = getQec(); + qec->getQueryTreeCache().clearAll(); + std::vector idTablesVector{}; + idTablesVector.push_back(makeIdTableFromVector({{3, 4}})); + idTablesVector.push_back(makeIdTableFromVector({{7, 8}, {9, 123}})); + ValuesForTesting valuesForTesting{ + qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}}; + + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + + auto originalSize = qec->getQueryTreeCache().getMaxSizeSingleEntry(); + + // Too small for storage + qec->getQueryTreeCache().setMaxSizeSingleEntry(1_B); + + auto cacheValue = valuesForTesting.runComputationAndPrepareForCache( + timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); + qec->getQueryTreeCache().setMaxSizeSingleEntry(originalSize); + + for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + } + + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); +} diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index ac7e363a95..15f7f1f152 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -15,7 +15,7 @@ // operation. class ValuesForTesting : public Operation { private: - IdTable table_; + std::vector table_; std::vector> variables_; bool supportsLimit_; // Those can be manually overwritten for testing using the respective getters. @@ -33,15 +33,38 @@ class ValuesForTesting : public Operation { LocalVocab localVocab = LocalVocab{}, std::optional multiplicity = std::nullopt) : Operation{ctx}, - table_{std::move(table)}, + table_{}, variables_{std::move(variables)}, supportsLimit_{supportsLimit}, - sizeEstimate_{table_.numRows()}, - costEstimate_{table_.numRows()}, + sizeEstimate_{table.numRows()}, + costEstimate_{table.numRows()}, resultSortedColumns_{std::move(sortedColumns)}, localVocab_{std::move(localVocab)}, multiplicity_{multiplicity} { - AD_CONTRACT_CHECK(variables_.size() == table_.numColumns()); + AD_CONTRACT_CHECK(variables_.size() == table.numColumns()); + table_.push_back(std::move(table)); + } + explicit ValuesForTesting(QueryExecutionContext* ctx, + std::vector tables, + std::vector> variables) + : Operation{ctx}, + table_{std::move(tables)}, + variables_{std::move(variables)}, + supportsLimit_{false}, + sizeEstimate_{0}, + costEstimate_{0}, + resultSortedColumns_{}, + localVocab_{LocalVocab{}}, + multiplicity_{std::nullopt} { + AD_CONTRACT_CHECK(std::ranges::all_of(table_, [this](const IdTable& table) { + return variables_.size() == table.numColumns(); + })); + size_t totalRows = 0; + for (const IdTable& idTable : table_) { + totalRows += idTable.numRows(); + } + sizeEstimate_ = totalRows; + costEstimate_ = totalRows; } // Accessors for the estimates for manual testing. @@ -49,8 +72,33 @@ class ValuesForTesting : public Operation { size_t& costEstimate() { return costEstimate_; } // ___________________________________________________________________________ - ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { - auto table = table_.clone(); + ProtoResult computeResult(bool requestLaziness) override { + if (requestLaziness) { + // Not implemented yet + AD_CORRECTNESS_CHECK(!supportsLimit_); + std::vector clones; + clones.reserve(table_.size()); + for (const IdTable& idTable : table_) { + clones.push_back(idTable.clone()); + } + auto generator = [](auto idTables) -> cppcoro::generator { + for (IdTable& idTable : idTables) { + co_yield std::move(idTable); + } + }(std::move(clones)); + return {std::move(generator), resultSortedOn(), localVocab_.clone()}; + } + std::optional optionalTable; + if (table_.size() > 1) { + IdTable aggregateTable{table_.at(0).numColumns(), + table_.at(0).getAllocator()}; + for (const IdTable& idTable : table_) { + aggregateTable.insertAtEnd(idTable); + } + optionalTable = std::move(aggregateTable); + } + auto table = optionalTable.has_value() ? std::move(optionalTable).value() + : table_.at(0).clone(); if (supportsLimit_) { table.erase(table.begin() + getLimit().upperBound(table.size()), table.end()); @@ -65,14 +113,19 @@ class ValuesForTesting : public Operation { // ___________________________________________________________________________ string getCacheKeyImpl() const override { std::stringstream str; - str << "Values for testing with " << table_.numColumns() << " columns and " - << table_.numRows() << " rows. "; - if (table_.numRows() > 1000) { + auto numRowsView = table_ | std::views::transform(&IdTable::numRows); + auto totalNumRows = std::reduce(numRowsView.begin(), numRowsView.end(), 0); + auto numCols = table_.empty() ? 0 : table_.at(0).numColumns(); + str << "Values for testing with " << numCols << " columns and " + << totalNumRows << " rows. "; + if (totalNumRows > 1000) { str << ad_utility::FastRandomIntGenerator{}(); } else { - for (size_t i = 0; i < table_.numColumns(); ++i) { - for (Id entry : table_.getColumn(i)) { - str << entry << ' '; + for (const IdTable& idTable : table_) { + for (size_t i = 0; i < idTable.numColumns(); ++i) { + for (Id entry : idTable.getColumn(i)) { + str << entry << ' '; + } } } } @@ -85,7 +138,9 @@ class ValuesForTesting : public Operation { return "explicit values for testing"; } - size_t getResultWidth() const override { return table_.numColumns(); } + size_t getResultWidth() const override { + return table_.empty() ? 0 : table_.at(0).numColumns(); + } vector resultSortedOn() const override { return resultSortedColumns_; @@ -117,7 +172,10 @@ class ValuesForTesting : public Operation { continue; } bool containsUndef = - ad_utility::contains(table_.getColumn(i), Id::makeUndefined()); + std::ranges::any_of(table_, [&i](const IdTable& table) { + return std::ranges::any_of(table.getColumn(i), + [](Id id) { return id.isUndefined(); }); + }); using enum ColumnIndexAndTypeInfo::UndefStatus; m[variables_.at(i).value()] = ColumnIndexAndTypeInfo{ i, containsUndef ? PossiblyUndefined : AlwaysDefined}; diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index 24826902fe..f683c9a381 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -85,4 +85,54 @@ class ShallowParentOperation : public Operation { } }; +class AlwaysFailLazyOperation : public Operation { + std::vector getChildren() override { return {}; } + string getCacheKeyImpl() const override { return "AlwaysFailLazyOperation"; } + string getDescriptor() const override { + return "AlwaysFailLazyOperationDescriptor"; + } + size_t getResultWidth() const override { return 0; } + size_t getCostEstimate() override { return 0; } + uint64_t getSizeEstimateBeforeLimit() override { return 0; } + float getMultiplicity([[maybe_unused]] size_t) override { return 0; } + bool knownEmptyResult() override { return false; } + vector resultSortedOn() const override { return {}; } + VariableToColumnMap computeVariableToColumnMap() const override { return {}; } + + public: + using Operation::Operation; + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { + return {[]() -> cppcoro::generator { + throw std::runtime_error{"AlwaysFailLazyOperation"}; + // Required so that the exception only occurs within the generator + co_return; + }(), + resultSortedOn(), LocalVocab{}}; + } +}; + +class CustomGeneratorOperation : public Operation { + cppcoro::generator generator_; + std::vector getChildren() override { return {}; } + string getCacheKeyImpl() const override { return "CustomGeneratorOperation"; } + string getDescriptor() const override { + return "CustomGeneratorOperationDescriptor"; + } + size_t getResultWidth() const override { return 0; } + size_t getCostEstimate() override { return 0; } + uint64_t getSizeEstimateBeforeLimit() override { return 0; } + float getMultiplicity([[maybe_unused]] size_t) override { return 0; } + bool knownEmptyResult() override { return false; } + vector resultSortedOn() const override { return {}; } + VariableToColumnMap computeVariableToColumnMap() const override { return {}; } + + public: + CustomGeneratorOperation(QueryExecutionContext* context, + cppcoro::generator generator) + : Operation{context}, generator_{std::move(generator)} {} + ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { + return {std::move(generator_), resultSortedOn(), LocalVocab{}}; + } +}; + #endif // QLEVER_OPERATIONTESTHELPERS_H From ef0f8721f22996f281d2f4d641a7802ae688cb25 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 19 Aug 2024 02:27:22 +0200 Subject: [PATCH 125/133] Rename variable and fix functionality --- test/engine/ValuesForTesting.h | 46 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 15f7f1f152..73fc703402 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -15,7 +15,7 @@ // operation. class ValuesForTesting : public Operation { private: - std::vector table_; + std::vector tables_; std::vector> variables_; bool supportsLimit_; // Those can be manually overwritten for testing using the respective getters. @@ -33,7 +33,7 @@ class ValuesForTesting : public Operation { LocalVocab localVocab = LocalVocab{}, std::optional multiplicity = std::nullopt) : Operation{ctx}, - table_{}, + tables_{}, variables_{std::move(variables)}, supportsLimit_{supportsLimit}, sizeEstimate_{table.numRows()}, @@ -42,13 +42,13 @@ class ValuesForTesting : public Operation { localVocab_{std::move(localVocab)}, multiplicity_{multiplicity} { AD_CONTRACT_CHECK(variables_.size() == table.numColumns()); - table_.push_back(std::move(table)); + tables_.push_back(std::move(table)); } explicit ValuesForTesting(QueryExecutionContext* ctx, std::vector tables, std::vector> variables) : Operation{ctx}, - table_{std::move(tables)}, + tables_{std::move(tables)}, variables_{std::move(variables)}, supportsLimit_{false}, sizeEstimate_{0}, @@ -56,11 +56,12 @@ class ValuesForTesting : public Operation { resultSortedColumns_{}, localVocab_{LocalVocab{}}, multiplicity_{std::nullopt} { - AD_CONTRACT_CHECK(std::ranges::all_of(table_, [this](const IdTable& table) { - return variables_.size() == table.numColumns(); - })); + AD_CONTRACT_CHECK( + std::ranges::all_of(tables_, [this](const IdTable& table) { + return variables_.size() == table.numColumns(); + })); size_t totalRows = 0; - for (const IdTable& idTable : table_) { + for (const IdTable& idTable : tables_) { totalRows += idTable.numRows(); } sizeEstimate_ = totalRows; @@ -77,8 +78,8 @@ class ValuesForTesting : public Operation { // Not implemented yet AD_CORRECTNESS_CHECK(!supportsLimit_); std::vector clones; - clones.reserve(table_.size()); - for (const IdTable& idTable : table_) { + clones.reserve(tables_.size()); + for (const IdTable& idTable : tables_) { clones.push_back(idTable.clone()); } auto generator = [](auto idTables) -> cppcoro::generator { @@ -89,16 +90,16 @@ class ValuesForTesting : public Operation { return {std::move(generator), resultSortedOn(), localVocab_.clone()}; } std::optional optionalTable; - if (table_.size() > 1) { - IdTable aggregateTable{table_.at(0).numColumns(), - table_.at(0).getAllocator()}; - for (const IdTable& idTable : table_) { + if (tables_.size() > 1) { + IdTable aggregateTable{tables_.at(0).numColumns(), + tables_.at(0).getAllocator()}; + for (const IdTable& idTable : tables_) { aggregateTable.insertAtEnd(idTable); } optionalTable = std::move(aggregateTable); } auto table = optionalTable.has_value() ? std::move(optionalTable).value() - : table_.at(0).clone(); + : tables_.at(0).clone(); if (supportsLimit_) { table.erase(table.begin() + getLimit().upperBound(table.size()), table.end()); @@ -113,15 +114,15 @@ class ValuesForTesting : public Operation { // ___________________________________________________________________________ string getCacheKeyImpl() const override { std::stringstream str; - auto numRowsView = table_ | std::views::transform(&IdTable::numRows); + auto numRowsView = tables_ | std::views::transform(&IdTable::numRows); auto totalNumRows = std::reduce(numRowsView.begin(), numRowsView.end(), 0); - auto numCols = table_.empty() ? 0 : table_.at(0).numColumns(); + auto numCols = tables_.empty() ? 0 : tables_.at(0).numColumns(); str << "Values for testing with " << numCols << " columns and " << totalNumRows << " rows. "; if (totalNumRows > 1000) { str << ad_utility::FastRandomIntGenerator{}(); } else { - for (const IdTable& idTable : table_) { + for (const IdTable& idTable : tables_) { for (size_t i = 0; i < idTable.numColumns(); ++i) { for (Id entry : idTable.getColumn(i)) { str << entry << ' '; @@ -139,7 +140,7 @@ class ValuesForTesting : public Operation { } size_t getResultWidth() const override { - return table_.empty() ? 0 : table_.at(0).numColumns(); + return tables_.empty() ? 0 : tables_.at(0).numColumns(); } vector resultSortedOn() const override { @@ -162,7 +163,10 @@ class ValuesForTesting : public Operation { vector getChildren() override { return {}; } - bool knownEmptyResult() override { return table_.empty(); } + bool knownEmptyResult() override { + return std::ranges::all_of( + tables_, [](const IdTable& table) { return table.empty(); }); + } private: VariableToColumnMap computeVariableToColumnMap() const override { @@ -172,7 +176,7 @@ class ValuesForTesting : public Operation { continue; } bool containsUndef = - std::ranges::any_of(table_, [&i](const IdTable& table) { + std::ranges::any_of(tables_, [&i](const IdTable& table) { return std::ranges::any_of(table.getColumn(i), [](Id id) { return id.isUndefined(); }); }); From dff4910f447dec46f46e8fddf2ad82c5d25afc0b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 19 Aug 2024 21:15:25 +0200 Subject: [PATCH 126/133] Add unit tests for `Result` class --- test/CMakeLists.txt | 2 + test/ResultTest.cpp | 558 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 560 insertions(+) create mode 100644 test/ResultTest.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4da4ff119a..c75ffcd075 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -398,3 +398,5 @@ addLinkAndDiscoverTest(CopyableSynchronizationTest) addLinkAndDiscoverTest(CacheableGeneratorTest) addLinkAndDiscoverTest(FilterTest engine) + +addLinkAndDiscoverTest(ResultTest engine) diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp new file mode 100644 index 0000000000..8713e1f399 --- /dev/null +++ b/test/ResultTest.cpp @@ -0,0 +1,558 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include + +#include "engine/Result.h" +#include "util/IdTableHelpers.h" + +using namespace std::chrono_literals; + +namespace { +// Helper function to generate all possible splits of an IdTable in order to +// exhaustively test generator variants. +std::vector> getAllSubSplits( + const IdTable& idTable) { + std::vector> result; + for (size_t i = 0; i < std::pow(idTable.size() - 1, 2); ++i) { + std::vector reverseIndex{}; + size_t copy = i; + for (size_t index = 0; index < idTable.size(); ++index) { + if (copy % 2 == 1) { + reverseIndex.push_back(index); + } + copy /= 2; + } + result.push_back( + [](auto split, IdTable clone) -> cppcoro::generator { + IdTable subSplit{clone.numColumns(), + ad_utility::makeUnlimitedAllocator()}; + size_t splitIndex = 0; + for (size_t i = 0; i < clone.size(); ++i) { + subSplit.push_back(clone[i]); + if (splitIndex < split.size() && split[splitIndex] == i) { + co_yield subSplit; + subSplit.clear(); + ++splitIndex; + } + } + if (subSplit.size() > 0) { + co_yield subSplit; + } + }(std::move(reverseIndex), idTable.clone())); + } + + return result; +} + +// _____________________________________________________________________________ +void consumeGenerator(cppcoro::generator& generator) { + for ([[maybe_unused]] IdTable& _ : generator) { + } +} +} // namespace + +TEST(Result, verifyIdTableThrowsWhenActuallyLazy) { + Result result1{ + []() -> cppcoro::generator { co_return; }(), {}, LocalVocab{}}; + EXPECT_FALSE(result1.isFullyMaterialized()); + EXPECT_THROW(result1.idTable(), ad_utility::Exception); + + Result result2{[]() -> cppcoro::generator { co_return; }(), + {}, + result1.getSharedLocalVocab()}; + EXPECT_FALSE(result2.isFullyMaterialized()); + EXPECT_THROW(result2.idTable(), ad_utility::Exception); +} + +// _____________________________________________________________________________ +TEST(Result, verifyIdTableThrowsOnSecondAccess) { + const Result result{ + []() -> cppcoro::generator { co_return; }(), {}, LocalVocab{}}; + // First access should work + for ([[maybe_unused]] IdTable& _ : result.idTables()) { + ADD_FAILURE() << "Generator is empty"; + } + // Now it should throw + EXPECT_THROW(result.idTables(), ad_utility::Exception); +} + +// _____________________________________________________________________________ +TEST(Result, verifyIdTablesThrowsWhenFullyMaterialized) { + Result result1{ + IdTable{ad_utility::makeUnlimitedAllocator()}, {}, LocalVocab{}}; + EXPECT_TRUE(result1.isFullyMaterialized()); + EXPECT_THROW(result1.idTables(), ad_utility::Exception); + + Result result2{IdTable{ad_utility::makeUnlimitedAllocator()}, + {}, + result1.getSharedLocalVocab()}; + EXPECT_TRUE(result2.isFullyMaterialized()); + EXPECT_THROW(result2.idTables(), ad_utility::Exception); +} + +// _____________________________________________________________________________ +TEST(Result, + verifyAssertSortOrderIsRespectedThrowsWhenNotSortedAndSucceedsWhenSorted) { + auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); + + EXPECT_NO_THROW((Result{idTable.clone(), {}, LocalVocab{}})); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {}, LocalVocab{}}; + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + + EXPECT_NO_THROW((Result{idTable.clone(), {0}, LocalVocab{}})); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {0}, LocalVocab{}}; + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + + EXPECT_NO_THROW((Result{idTable.clone(), {0, 1}, LocalVocab{}})); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {0, 1}, LocalVocab{}}; + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + + EXPECT_NO_THROW((Result{idTable.clone(), {2, 0}, LocalVocab{}})); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {2, 0}, LocalVocab{}}; + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + (Result{idTable.clone(), {1}, LocalVocab{}}), + ::testing::HasSubstr("compareRowsByJoinColumns"), ad_utility::Exception); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {1}, LocalVocab{}}; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + consumeGenerator(result.idTables()), + ::testing::HasSubstr("compareRowsByJoinColumns"), + ad_utility::Exception); + } + + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + (Result{idTable.clone(), {1, 0}, LocalVocab{}}), + ::testing::HasSubstr("compareRowsByJoinColumns"), ad_utility::Exception); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {1, 0}, LocalVocab{}}; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + consumeGenerator(result.idTables()), + ::testing::HasSubstr("compareRowsByJoinColumns"), + ad_utility::Exception); + } + + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + (Result{idTable.clone(), {2, 1}, LocalVocab{}}), + ::testing::HasSubstr("compareRowsByJoinColumns"), ad_utility::Exception); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {2, 1}, LocalVocab{}}; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + consumeGenerator(result.idTables()), + ::testing::HasSubstr("compareRowsByJoinColumns"), + ad_utility::Exception); + } +} + +// _____________________________________________________________________________ +TEST(Result, verifyRunOnNewChunkComputedThrowsWithFullyMaterializedResult) { + Result result{makeIdTableFromVector({{}}), {}, LocalVocab{}}; + + EXPECT_THROW( + result.runOnNewChunkComputed( + [](const IdTable&, std::chrono::microseconds) {}, [](bool) {}), + ad_utility::Exception); +} + +// _____________________________________________________________________________ +TEST(Result, verifyRunOnNewChunkComputedFiresCorrectly) { + auto idTable1 = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}}); + auto idTable2 = makeIdTableFromVector({{3, 4, 0}}); + auto idTable3 = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); + + Result result{ + [](auto& t1, auto& t2, auto& t3) -> cppcoro::generator { + std::this_thread::sleep_for(1ms); + co_yield t1; + std::this_thread::sleep_for(3ms); + co_yield t2; + std::this_thread::sleep_for(5ms); + co_yield t3; + }(idTable1, idTable2, idTable3), + {}, + LocalVocab{}}; + uint32_t callCounter = 0; + bool finishedConsuming = false; + + result.runOnNewChunkComputed( + [&](const IdTable& idTable, std::chrono::microseconds duration) { + ++callCounter; + if (callCounter == 1) { + EXPECT_EQ(&idTable1, &idTable); + EXPECT_GE(duration, 1ms); + } else if (callCounter == 2) { + EXPECT_EQ(&idTable2, &idTable); + EXPECT_GE(duration, 3ms); + } else if (callCounter == 3) { + EXPECT_EQ(&idTable3, &idTable); + EXPECT_GE(duration, 5ms); + } + }, + [&](bool error) { + EXPECT_FALSE(error); + finishedConsuming = true; + }); + + consumeGenerator(result.idTables()); + + EXPECT_EQ(callCounter, 3); + EXPECT_TRUE(finishedConsuming); +} + +// _____________________________________________________________________________ +TEST(Result, verifyRunOnNewChunkCallsFinishOnError) { + Result result{ + []() -> cppcoro::generator { + throw std::runtime_error{"verifyRunOnNewChunkCallsFinishOnError"}; + co_return; + }(), + {}, + LocalVocab{}}; + uint32_t callCounterGenerator = 0; + uint32_t callCounterFinished = 0; + + result.runOnNewChunkComputed( + [&](const IdTable&, std::chrono::microseconds) { + ++callCounterGenerator; + }, + [&](bool error) { + EXPECT_TRUE(error); + ++callCounterFinished; + }); + + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + consumeGenerator(result.idTables()), + ::testing::HasSubstr("verifyRunOnNewChunkCallsFinishOnError"), + std::runtime_error); + + EXPECT_EQ(callCounterGenerator, 0); + EXPECT_EQ(callCounterFinished, 1); +} + +// _____________________________________________________________________________ +TEST(Result, verifyRunOnNewChunkCallsFinishOnPartialConsumption) { + uint32_t callCounterGenerator = 0; + uint32_t callCounterFinished = 0; + + { + Result result{[](IdTable idTable) -> cppcoro::generator { + co_yield idTable; + }(makeIdTableFromVector({{}})), + {}, + LocalVocab{}}; + + result.runOnNewChunkComputed( + [&](const IdTable&, std::chrono::microseconds) { + ++callCounterGenerator; + }, + [&](bool error) { + EXPECT_FALSE(error); + ++callCounterFinished; + }); + + result.idTables().begin(); + } + + EXPECT_EQ(callCounterGenerator, 1); + EXPECT_EQ(callCounterFinished, 1); +} + +// _____________________________________________________________________________ +TEST(Result, verifyCacheDuringConsumptionThrowsWhenFullyMaterialized) { + Result result{makeIdTableFromVector({{}}), {}, LocalVocab{}}; + EXPECT_THROW( + result.cacheDuringConsumption( + [](const std::optional&, const IdTable&) { return true; }, + [](Result) {}), + ad_utility::Exception); +} + +// _____________________________________________________________________________ +TEST(Result, verifyCacheDuringConsumptionRespectsPassedParameters) { + auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); + + // Test positive case + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {0}, LocalVocab{}}; + result.cacheDuringConsumption( + [predictedSize = 0](const std::optional& aggregator, + const IdTable& newTable) mutable { + if (aggregator.has_value()) { + EXPECT_EQ(aggregator.value().numColumns(), predictedSize); + } else { + EXPECT_EQ(predictedSize, 0); + } + predictedSize += newTable.numColumns(); + return true; + }, + [&](Result aggregatedResult) { + EXPECT_TRUE(aggregatedResult.isFullyMaterialized()); + EXPECT_EQ(aggregatedResult.idTable(), idTable); + EXPECT_EQ(aggregatedResult.sortedBy(), std::vector{0}); + }); + } + + // Test negative case + for (auto& generator : getAllSubSplits(idTable)) { + uint32_t callCounter = 0; + Result result{std::move(generator), {}, LocalVocab{}}; + result.cacheDuringConsumption( + [&](const std::optional& aggregator, const IdTable&) { + EXPECT_FALSE(aggregator.has_value()); + ++callCounter; + return false; + }, + [&](Result) { ++callCounter; }); + EXPECT_EQ(callCounter, 0); + } +} + +// _____________________________________________________________________________ +TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { + auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); + LimitOffsetClause limitOffset{2, 1}; + { + uint32_t callCounter = 0; + Result result{idTable.clone(), {}, LocalVocab{}}; + result.applyLimitOffset( + limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { + // NOTE: duration can't be tested here, processors are too fast + auto comparisonTable = makeIdTableFromVector({{1, 6}, {2, 5}}); + EXPECT_EQ(innerTable, comparisonTable); + EXPECT_EQ(innerTable.numColumns(), 2); + EXPECT_EQ(innerTable.numRows(), 2); + ++callCounter; + }); + EXPECT_EQ(callCounter, 1); + } + + for (auto& generator : getAllSubSplits(idTable)) { + std::vector colSizes{}; + uint32_t totalRows = 0; + Result result{std::move(generator), {}, LocalVocab{}}; + result.applyLimitOffset( + limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { + // NOTE: duration can't be tested here, processors are too fast + for (const auto& row : innerTable) { + ASSERT_EQ(row.size(), 2); + EXPECT_NE(row[0].getVocabIndex().get(), 0); + EXPECT_NE(row[0].getVocabIndex().get(), 3); + EXPECT_NE(row[1].getVocabIndex().get(), 7); + EXPECT_NE(row[1].getVocabIndex().get(), 4); + } + totalRows += innerTable.size(); + colSizes.push_back(innerTable.numColumns()); + }); + + EXPECT_EQ(totalRows, 0); + EXPECT_TRUE(colSizes.empty()); + + consumeGenerator(result.idTables()); + + EXPECT_EQ(totalRows, 2); + EXPECT_THAT(colSizes, ::testing::Each(testing::Eq(2))); + } +} + +// _____________________________________________________________________________ +TEST(Result, verifyApplyLimitOffsetHandlesZeroLimitCorrectly) { + auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); + LimitOffsetClause limitOffset{0, 1}; + { + uint32_t callCounter = 0; + Result result{idTable.clone(), {}, LocalVocab{}}; + result.applyLimitOffset( + limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { + EXPECT_EQ(innerTable.numRows(), 0); + ++callCounter; + }); + EXPECT_EQ(callCounter, 1); + } + + for (auto& generator : getAllSubSplits(idTable)) { + uint32_t callCounter = 0; + Result result{std::move(generator), {}, LocalVocab{}}; + result.applyLimitOffset( + limitOffset, + [&](std::chrono::microseconds, const IdTable&) { ++callCounter; }); + + consumeGenerator(result.idTables()); + + EXPECT_EQ(callCounter, 0); + } +} + +// _____________________________________________________________________________ +TEST(Result, verifyAssertThatLimitWasRespectedDoesThrowIfLimitWasNotRespected) { + auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); + { + Result result{idTable.clone(), {}, LocalVocab{}}; + EXPECT_NO_THROW(result.assertThatLimitWasRespected({})); + EXPECT_NO_THROW(result.assertThatLimitWasRespected({4, 0})); + EXPECT_NO_THROW(result.assertThatLimitWasRespected({4, 1337})); + EXPECT_NO_THROW(result.assertThatLimitWasRespected({42, 0})); + EXPECT_NO_THROW(result.assertThatLimitWasRespected({42, 1337})); + EXPECT_THROW(result.assertThatLimitWasRespected({3, 0}), + ad_utility::Exception); + EXPECT_THROW(result.assertThatLimitWasRespected({3, 1}), + ad_utility::Exception); + EXPECT_THROW(result.assertThatLimitWasRespected({3, 2}), + ad_utility::Exception); + } + + auto createResultsAndApplyAssertion = [&](LimitOffsetClause limitOffset) { + std::vector results; + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.assertThatLimitWasRespected(limitOffset); + results.push_back(std::move(result)); + } + return results; + }; + + for (auto& result : createResultsAndApplyAssertion({})) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + for (auto& result : createResultsAndApplyAssertion({4, 0})) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + for (auto& result : createResultsAndApplyAssertion({4, 1337})) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + for (auto& result : createResultsAndApplyAssertion({42, 0})) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + for (auto& result : createResultsAndApplyAssertion({42, 1337})) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + for (auto& result : createResultsAndApplyAssertion({3, 0})) { + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } + for (auto& result : createResultsAndApplyAssertion({3, 1})) { + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } + for (auto& result : createResultsAndApplyAssertion({3, 2})) { + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } +} + +// _____________________________________________________________________________ +TEST(Result, + verifyCheckDefinednessDoesThrowIfColumnIsNotDefinedWhenClaimingItIs) { + auto correctTable1 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); + auto correctTable2 = + makeIdTableFromVector({{0, Id::makeUndefined()}, {1, 6}, {2, 5}, {3, 4}}); + auto correctTable3 = + makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, Id::makeUndefined()}}); + auto correctTable4 = makeIdTableFromVector({{0, Id::makeUndefined()}, + {1, Id::makeUndefined()}, + {2, Id::makeUndefined()}, + {3, Id::makeUndefined()}}); + auto wrongTable1 = + makeIdTableFromVector({{Id::makeUndefined(), 7}, {1, 6}, {2, 5}, {3, 4}}); + auto wrongTable2 = makeIdTableFromVector({{Id::makeUndefined(), 7}, + {Id::makeUndefined(), 6}, + {Id::makeUndefined(), 5}, + {Id::makeUndefined(), 4}}); + auto wrongTable3 = + makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {Id::makeUndefined(), 4}}); + VariableToColumnMap map{ + {Variable{"?a"}, {0, ColumnIndexAndTypeInfo::AlwaysDefined}}, + {Variable{"?b"}, {1, ColumnIndexAndTypeInfo::PossiblyUndefined}}}; + + { + Result result{correctTable1.clone(), {}, LocalVocab{}}; + EXPECT_NO_THROW(result.checkDefinedness(map)); + } + { + Result result{correctTable2.clone(), {}, LocalVocab{}}; + EXPECT_NO_THROW(result.checkDefinedness(map)); + } + { + Result result{correctTable3.clone(), {}, LocalVocab{}}; + EXPECT_NO_THROW(result.checkDefinedness(map)); + } + { + Result result{correctTable4.clone(), {}, LocalVocab{}}; + EXPECT_NO_THROW(result.checkDefinedness(map)); + } + { + Result result{wrongTable1.clone(), {}, LocalVocab{}}; + EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); + } + { + Result result{wrongTable2.clone(), {}, LocalVocab{}}; + EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); + } + { + Result result{wrongTable3.clone(), {}, LocalVocab{}}; + EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); + } + { + for (auto& generator : getAllSubSplits(correctTable1)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + } + { + for (auto& generator : getAllSubSplits(correctTable2)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + } + { + for (auto& generator : getAllSubSplits(correctTable3)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + } + { + for (auto& generator : getAllSubSplits(correctTable4)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } + } + { + for (auto& generator : getAllSubSplits(wrongTable1)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } + } + { + for (auto& generator : getAllSubSplits(wrongTable2)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } + } + { + for (auto& generator : getAllSubSplits(wrongTable3)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } + } +} From a43102135f3754020225b47e034aeae4006707d9 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 20 Aug 2024 00:45:43 +0200 Subject: [PATCH 127/133] Add tests for more coverage and skip tests relying on expensive checks --- test/ConcurrentCacheTest.cpp | 59 ++++++++++++++++++++++++++++ test/ResultTest.cpp | 73 +++++++++++++++-------------------- test/engine/IndexScanTest.cpp | 17 ++++++++ 3 files changed, 107 insertions(+), 42 deletions(-) diff --git a/test/ConcurrentCacheTest.cpp b/test/ConcurrentCacheTest.cpp index 626b9b8bf9..7287dbe6ea 100644 --- a/test/ConcurrentCacheTest.cpp +++ b/test/ConcurrentCacheTest.cpp @@ -370,6 +370,65 @@ TEST(ConcurrentCache, isNotCachedIfUnsuitableWhenWaitingForPendingComputation) { EXPECT_THAT(result._resultPointer, Pointee("abc"s)); } +// _____________________________________________________________________________ +TEST(ConcurrentCache, isCachedIfSuitableWhenWaitingForPendingComputation) { + SimpleConcurrentLruCache cache{}; + + auto resultInProgress = std::make_shared< + ad_utility::ConcurrentCacheDetail::ResultInProgress>(); + + cache.clearAll(); + cache.getStorage().wlock()->_inProgress[0] = + std::pair(false, resultInProgress); + + std::atomic_bool finished = false; + + ad_utility::JThread thread{[&]() { + std::this_thread::sleep_for(5ms); + resultInProgress->finish(nullptr); + finished = true; + }}; + + auto result = cache.computeOnce( + 0, []() { return "abc"; }, false, [](const auto&) { return true; }); + + EXPECT_TRUE(finished); + EXPECT_EQ(cache.numNonPinnedEntries(), 1); + EXPECT_EQ(cache.numPinnedEntries(), 0); + EXPECT_THAT(result._resultPointer, Pointee("abc"s)); + EXPECT_TRUE(cache.cacheContains(0)); +} + +// _____________________________________________________________________________ +TEST(ConcurrentCache, + isCachedIfSuitableWhenWaitingForPendingComputationPinned) { + SimpleConcurrentLruCache cache{}; + + auto resultInProgress = std::make_shared< + ad_utility::ConcurrentCacheDetail::ResultInProgress>(); + + cache.clearAll(); + cache.getStorage().wlock()->_inProgress[0] = + std::pair(false, resultInProgress); + + std::atomic_bool finished = false; + + ad_utility::JThread thread{[&]() { + std::this_thread::sleep_for(5ms); + resultInProgress->finish(nullptr); + finished = true; + }}; + + auto result = cache.computeOncePinned( + 0, []() { return "abc"; }, false, [](const auto&) { return true; }); + + EXPECT_TRUE(finished); + EXPECT_EQ(cache.numNonPinnedEntries(), 0); + EXPECT_EQ(cache.numPinnedEntries(), 1); + EXPECT_THAT(result._resultPointer, Pointee("abc"s)); + EXPECT_TRUE(cache.cacheContains(0)); +} + // _____________________________________________________________________________ TEST(ConcurrentCache, ifUnsuitableForCacheAndPinnedThrowsException) { SimpleConcurrentLruCache cache{}; diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index 8713e1f399..e86fc3d2e1 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -457,6 +457,9 @@ TEST(Result, verifyAssertThatLimitWasRespectedDoesThrowIfLimitWasNotRespected) { // _____________________________________________________________________________ TEST(Result, verifyCheckDefinednessDoesThrowIfColumnIsNotDefinedWhenClaimingItIs) { + if constexpr (!ad_utility::areExpensiveChecksEnabled) { + GTEST_SKIP_("Expensive checks are disabled, skipping test."); + } auto correctTable1 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); auto correctTable2 = makeIdTableFromVector({{0, Id::makeUndefined()}, {1, 6}, {2, 5}, {3, 4}}); @@ -506,53 +509,39 @@ TEST(Result, Result result{wrongTable3.clone(), {}, LocalVocab{}}; EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); } - { - for (auto& generator : getAllSubSplits(correctTable1)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } + for (auto& generator : getAllSubSplits(correctTable1)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); } - { - for (auto& generator : getAllSubSplits(correctTable2)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } + for (auto& generator : getAllSubSplits(correctTable2)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); } - { - for (auto& generator : getAllSubSplits(correctTable3)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } + for (auto& generator : getAllSubSplits(correctTable3)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); } - { - for (auto& generator : getAllSubSplits(correctTable4)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } + for (auto& generator : getAllSubSplits(correctTable4)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_NO_THROW(consumeGenerator(result.idTables())); } - { - for (auto& generator : getAllSubSplits(wrongTable1)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); - } + for (auto& generator : getAllSubSplits(wrongTable1)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); } - { - for (auto& generator : getAllSubSplits(wrongTable2)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); - } + for (auto& generator : getAllSubSplits(wrongTable2)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); } - { - for (auto& generator : getAllSubSplits(wrongTable3)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); - } + for (auto& generator : getAllSubSplits(wrongTable3)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.checkDefinedness(map); + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); } } diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 315f429a7d..2f3a89259c 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -485,3 +485,20 @@ TEST(IndexScan, computeResultCanBeConsumedLazily) { EXPECT_EQ(resultValues[2][0], p2); EXPECT_EQ(resultValues[2][1], s1); } + +// _____________________________________________________________________________ +TEST(IndexScan, computeResultReturnsEmptyGeneratorIfScanIsEmpty) { + using V = Variable; + using I = TripleComponent::Iri; + auto qec = getQec("

, . .", true, false); + SparqlTripleSimple scanTriple{V{"?x"}, I::fromIriref(""), V{"?z"}}; + IndexScan scan{qec, Permutation::Enum::POS, scanTriple}; + + ProtoResult result = scan.computeResultOnlyForTesting(true); + + ASSERT_FALSE(result.isFullyMaterialized()); + + for ([[maybe_unused]] IdTable& idTable : result.idTables()) { + ADD_FAILURE() << "Generator should be empty" << std::endl; + } +} From 476a691c558f4603c9c9d87dc329b83784c4c8b9 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 20 Aug 2024 01:26:15 +0200 Subject: [PATCH 128/133] Add test for edge case and skip test on expensive checks disabled --- test/ResultTest.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index e86fc3d2e1..d8a2b4ef99 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -95,6 +95,9 @@ TEST(Result, verifyIdTablesThrowsWhenFullyMaterialized) { // _____________________________________________________________________________ TEST(Result, verifyAssertSortOrderIsRespectedThrowsWhenNotSortedAndSucceedsWhenSorted) { + if constexpr (!ad_utility::areExpensiveChecksEnabled) { + GTEST_SKIP_("Expensive checks are disabled, skipping test."); + } auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); EXPECT_NO_THROW((Result{idTable.clone(), {}, LocalVocab{}})); @@ -162,6 +165,38 @@ TEST(Result, } } +// _____________________________________________________________________________ +TEST(Result, + verifyAnErrorIsThrownIfSortedByHasHigherIndicesThanTheTableHasColumns) { + auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); + + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + (Result{idTable.clone(), {3}, LocalVocab{}}), + ::testing::HasSubstr("colIndex < idTable.numColumns()"), + ad_utility::Exception); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {3}, LocalVocab{}}; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + consumeGenerator(result.idTables()), + ::testing::HasSubstr("colIndex < idTable.numColumns()"), + ad_utility::Exception); + } + + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + (Result{idTable.clone(), {2, 1337}, LocalVocab{}}), + ::testing::HasSubstr("colIndex < idTable.numColumns()"), + ad_utility::Exception); + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {2, 1337}, LocalVocab{}}; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + consumeGenerator(result.idTables()), + ::testing::HasSubstr("colIndex < idTable.numColumns()"), + ad_utility::Exception); + } +} + // _____________________________________________________________________________ TEST(Result, verifyRunOnNewChunkComputedThrowsWithFullyMaterializedResult) { Result result{makeIdTableFromVector({{}}), {}, LocalVocab{}}; From b8b03f92baec729ae289b68402649c373fb00517 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 20 Aug 2024 02:23:14 +0200 Subject: [PATCH 129/133] Fix sonarcloud issues --- src/engine/Operation.cpp | 2 +- src/engine/Operation.h | 3 ++- src/engine/Result.cpp | 19 +++++++------------ 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 6214bafcd3..19e3ee57e4 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -72,7 +72,7 @@ void Operation::recursivelySetTimeConstraint( // _____________________________________________________________________________ void Operation::updateRuntimeStats(bool applyToFilter, uint64_t numRows, uint64_t numCols, - std::chrono::milliseconds duration) { + std::chrono::milliseconds duration) const { auto& rti = applyToFilter || !externalFilterApplied_ ? runtimeInfo() : *runtimeInfo().children_.at(0); diff --git a/src/engine/Operation.h b/src/engine/Operation.h index fe8b5cd8af..a386e63595 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -266,7 +266,8 @@ class Operation { // arguments, considering the possibility that the initial runtime information // was replaced by calling `RuntimeInformation::addLimitOffsetRow`. void updateRuntimeStats(bool applyToFilter, uint64_t numRows, - uint64_t numCols, std::chrono::milliseconds duration); + uint64_t numCols, + std::chrono::milliseconds duration) const; // Perform the expensive computation modeled by the subclass of this // `Operation`. The value provided by `computationMode` decides if lazy diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 454fad7fde..706c2484f8 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -72,7 +72,7 @@ Result::Result(cppcoro::generator idTables, [](auto idTables, auto sortedBy) -> cppcoro::generator { std::optional previousId = std::nullopt; for (IdTable& idTable : idTables) { - if (idTable.size() > 0) { + if (!idTable.empty()) { if (previousId.has_value()) { AD_EXPENSIVE_CHECK(!compareRowsByJoinColumns(sortedBy)( idTable.at(0), previousId.value())); @@ -125,10 +125,9 @@ void Result::applyLimitOffset( resizeIdTable(std::get(data_), limitOffset); limitTimeCallback(limitTimer.msecs(), idTable()); } else { - auto generator = - [](cppcoro::generator original, LimitOffsetClause limitOffset, - std::function - limitTimeCallback) -> cppcoro::generator { + auto generator = [](cppcoro::generator original, + LimitOffsetClause limitOffset, + auto limitTimeCallback) -> cppcoro::generator { if (limitOffset._limit.value_or(1) == 0) { co_return; } @@ -213,12 +212,8 @@ void Result::runOnNewChunkComputed( std::function onNewChunk, std::function onGeneratorFinished) { AD_CONTRACT_CHECK(!isFullyMaterialized()); - auto generator = - [](cppcoro::generator original, - std::function - onNewChunk, - std::function onGeneratorFinished) - -> cppcoro::generator { + auto generator = [](cppcoro::generator original, auto onNewChunk, + auto onGeneratorFinished) -> cppcoro::generator { // Call this within destructor to make sure it is also called when an // operation stops iterating before reaching the end. absl::Cleanup cleanup{ @@ -236,7 +231,7 @@ void Result::runOnNewChunkComputed( throw; } }(std::move(idTables()), std::move(onNewChunk), - std::move(onGeneratorFinished)); + std::move(onGeneratorFinished)); data_.emplace(std::move(generator)); } From 1d83e866adf81269ff5f8fcc951aa18de0c2efa8 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:14:45 +0200 Subject: [PATCH 130/133] Adjust limit values for better coverage --- test/ResultTest.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index d8a2b4ef99..fe3b36731b 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -362,15 +362,16 @@ TEST(Result, verifyCacheDuringConsumptionRespectsPassedParameters) { // _____________________________________________________________________________ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { - auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); - LimitOffsetClause limitOffset{2, 1}; + auto idTable = + makeIdTableFromVector({{0, 9}, {1, 8}, {2, 7}, {3, 6}, {4, 5}}); + LimitOffsetClause limitOffset{2, 2}; { uint32_t callCounter = 0; Result result{idTable.clone(), {}, LocalVocab{}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { // NOTE: duration can't be tested here, processors are too fast - auto comparisonTable = makeIdTableFromVector({{1, 6}, {2, 5}}); + auto comparisonTable = makeIdTableFromVector({{2, 7}, {3, 6}}); EXPECT_EQ(innerTable, comparisonTable); EXPECT_EQ(innerTable.numColumns(), 2); EXPECT_EQ(innerTable.numRows(), 2); @@ -389,9 +390,11 @@ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { for (const auto& row : innerTable) { ASSERT_EQ(row.size(), 2); EXPECT_NE(row[0].getVocabIndex().get(), 0); - EXPECT_NE(row[0].getVocabIndex().get(), 3); - EXPECT_NE(row[1].getVocabIndex().get(), 7); - EXPECT_NE(row[1].getVocabIndex().get(), 4); + EXPECT_NE(row[0].getVocabIndex().get(), 1); + EXPECT_NE(row[0].getVocabIndex().get(), 4); + EXPECT_NE(row[1].getVocabIndex().get(), 9); + EXPECT_NE(row[1].getVocabIndex().get(), 8); + EXPECT_NE(row[1].getVocabIndex().get(), 5); } totalRows += innerTable.size(); colSizes.push_back(innerTable.numColumns()); From 538814f5b1daa637e98b6c67bd252dbe7a7ef362 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:04:22 +0200 Subject: [PATCH 131/133] Address PR comments --- src/engine/Filter.cpp | 1 - src/engine/Operation.cpp | 47 +++++++++++--------------- src/engine/Operation.h | 21 ++++++++---- src/engine/Result.cpp | 9 +++-- src/engine/Result.h | 3 +- src/engine/RuntimeInformation.cpp | 29 +++++++++------- src/engine/RuntimeInformation.h | 9 ++--- test/OperationTest.cpp | 56 +++---------------------------- test/ResultTest.cpp | 12 +++---- 9 files changed, 72 insertions(+), 115 deletions(-) diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 9ede82df0f..2458d87fb0 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -59,7 +59,6 @@ ProtoResult Filter::computeResult(bool requestLaziness) { return {[](auto subRes, auto* self) -> cppcoro::generator { for (IdTable& idTable : subRes->idTables()) { IdTable result = self->filterIdTable(subRes, idTable); - LOG(DEBUG) << "Filter result chunk done." << endl; co_yield result; } }(std::move(subRes), this), diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 19e3ee57e4..dc1ee4d97c 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -70,20 +70,22 @@ void Operation::recursivelySetTimeConstraint( } // _____________________________________________________________________________ -void Operation::updateRuntimeStats(bool applyToFilter, uint64_t numRows, +void Operation::updateRuntimeStats(bool applyToLimit, uint64_t numRows, uint64_t numCols, - std::chrono::milliseconds duration) const { - auto& rti = applyToFilter || !externalFilterApplied_ - ? runtimeInfo() - : *runtimeInfo().children_.at(0); + std::chrono::microseconds duration) const { + bool isRtiWrappedInLimit = !applyToLimit && externalLimitApplied_; + auto& rti = + isRtiWrappedInLimit ? *runtimeInfo().children_.at(0) : runtimeInfo(); rti.totalTime_ += duration; rti.originalTotalTime_ = rti.totalTime_; rti.originalOperationTime_ = rti.getOperationTime(); - if (!applyToFilter || externalFilterApplied_) { + // Don't update the number of rows/cols twice if the rti for the limit and the + // rti for the actual operation are the same. + if (!applyToLimit || externalLimitApplied_) { rti.numRows_ += numRows; rti.numCols_ = numCols; } - if (!applyToFilter && externalFilterApplied_) { + if (isRtiWrappedInLimit) { runtimeInfo().totalTime_ += duration; runtimeInfo().originalTotalTime_ = runtimeInfo().totalTime_; runtimeInfo().originalOperationTime_ = runtimeInfo().getOperationTime(); @@ -124,18 +126,14 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, } else { runtimeInfo().status_ = RuntimeInformation::lazilyMaterialized; result.runOnNewChunkComputed( - [this, overlap = 0us, timeSizeUpdate = 0us]( + [this, timeSizeUpdate = 0us]( const IdTable& idTable, std::chrono::microseconds duration) mutable { - overlap += duration; - timeSizeUpdate += duration; - auto msPrecision = - std::chrono::duration_cast(overlap); updateRuntimeStats(false, idTable.numRows(), idTable.numColumns(), - msPrecision); - overlap -= msPrecision; + duration); LOG(DEBUG) << "Computed partial chunk of size " << idTable.numRows() << " x " << idTable.numColumns() << std::endl; + timeSizeUpdate += duration; if (timeSizeUpdate > 50ms) { timeSizeUpdate = 0us; signalQueryUpdate(); @@ -158,18 +156,13 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, // limits and offsets. if (!supportsLimit()) { runtimeInfo().addLimitOffsetRow(_limit, true); - AD_CONTRACT_CHECK(!externalFilterApplied_); - externalFilterApplied_ = _limit._limit.has_value() || _limit._offset != 0; - result.applyLimitOffset( - _limit, [this, overlap = 0us](std::chrono::microseconds limitTime, - const IdTable& idTable) mutable { - overlap += limitTime; - auto msPrecision = - std::chrono::duration_cast(overlap); - updateRuntimeStats(true, idTable.numRows(), idTable.numColumns(), - msPrecision); - overlap -= msPrecision; - }); + AD_CONTRACT_CHECK(!externalLimitApplied_); + externalLimitApplied_ = _limit._limit.has_value() || _limit._offset != 0; + result.applyLimitOffset(_limit, [this](std::chrono::microseconds limitTime, + const IdTable& idTable) { + updateRuntimeStats(true, idTable.numRows(), idTable.numColumns(), + limitTime); + }); } else { result.assertThatLimitWasRespected(_limit); } @@ -378,7 +371,7 @@ void Operation::updateRuntimeInformationWhenOptimizedOut( auto timesOfChildren = _runtimeInfo->children_ | std::views::transform(&RuntimeInformation::totalTime_); _runtimeInfo->totalTime_ = - std::reduce(timesOfChildren.begin(), timesOfChildren.end(), 0ms); + std::reduce(timesOfChildren.begin(), timesOfChildren.end(), 0us); signalQueryUpdate(); } diff --git a/src/engine/Operation.h b/src/engine/Operation.h index a386e63595..7420f155f9 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -265,9 +265,14 @@ class Operation { // Update the runtime information of this operation according to the given // arguments, considering the possibility that the initial runtime information // was replaced by calling `RuntimeInformation::addLimitOffsetRow`. - void updateRuntimeStats(bool applyToFilter, uint64_t numRows, - uint64_t numCols, - std::chrono::milliseconds duration) const; + // `applyToLimit` indicates if the stats should be applied to the runtime + // information of the limit, or the runtime information of the actual + // operation. If `supportsLimit() == true`, then the operation does already + // track the limit stats correctly and there's no need to keep track of both. + // Otherwise `externalLimitApplied_` decides how stat tracking should be + // handled. + void updateRuntimeStats(bool applyToLimit, uint64_t numRows, uint64_t numCols, + std::chrono::microseconds duration) const; // Perform the expensive computation modeled by the subclass of this // `Operation`. The value provided by `computationMode` decides if lazy @@ -276,8 +281,8 @@ class Operation { ProtoResult runComputation(const ad_utility::Timer& timer, ComputationMode computationMode); - // Call `runComputationAndPrepareForCache` and transform it into a value that - // could be inserted into the cache. + // Call `runComputation` and transform it into a value that could be inserted + // into the cache. CacheValue runComputationAndPrepareForCache(const ad_utility::Timer& timer, ComputationMode computationMode, const std::string& cacheKey, @@ -382,7 +387,11 @@ class Operation { // Store the list of columns by which the result is sorted. mutable std::optional> _resultSortedColumns = std::nullopt; - bool externalFilterApplied_ = false; + + // True if this operation does not support limits/offsets natively and a + // limit/offset is applied post computation. + bool externalLimitApplied_ = false; + FRIEND_TEST(Operation, updateRuntimeStatsWorksCorrectly); FRIEND_TEST(Operation, verifyRuntimeInformationIsUpdatedForLazyOperations); FRIEND_TEST(Operation, ensureFailedStatusIsSetWhenGeneratorThrowsException); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 706c2484f8..f59971225b 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -37,7 +37,7 @@ auto Result::getMergedLocalVocab(const Result& result1, const Result& result2) LocalVocab Result::getCopyOfLocalVocab() const { return localVocab().clone(); } // _____________________________________________________________________________ -auto compareRowsByJoinColumns(const std::vector& sortedBy) { +auto compareRowsBySortColumns(const std::vector& sortedBy) { return [&sortedBy](const auto& row1, const auto& row2) { for (ColumnIndex col : sortedBy) { if (row1[col] != row2[col]) { @@ -74,7 +74,7 @@ Result::Result(cppcoro::generator idTables, for (IdTable& idTable : idTables) { if (!idTable.empty()) { if (previousId.has_value()) { - AD_EXPENSIVE_CHECK(!compareRowsByJoinColumns(sortedBy)( + AD_EXPENSIVE_CHECK(!compareRowsBySortColumns(sortedBy)( idTable.at(0), previousId.value())); } previousId = idTable.at(idTable.size() - 1); @@ -244,7 +244,7 @@ void Result::assertSortOrderIsRespected( })); AD_EXPENSIVE_CHECK( - std::ranges::is_sorted(idTable, compareRowsByJoinColumns(sortedBy))); + std::ranges::is_sorted(idTable, compareRowsBySortColumns(sortedBy))); } // _____________________________________________________________________________ @@ -257,8 +257,7 @@ const IdTable& Result::idTable() const { cppcoro::generator& Result::idTables() const { AD_CONTRACT_CHECK(!isFullyMaterialized()); const auto& container = std::get(data_); - AD_CONTRACT_CHECK(!container.consumed_); - container.consumed_ = true; + AD_CONTRACT_CHECK(!container.consumed_->exchange(true)); return container.generator_; } diff --git a/src/engine/Result.h b/src/engine/Result.h index ff68ec161e..2d896e364b 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -26,7 +26,8 @@ class Result { // Needs to be mutable in order to be consumable from a const result. struct GenContainer { mutable cppcoro::generator generator_; - mutable bool consumed_ = false; + mutable std::unique_ptr consumed_ = + std::make_unique(false); explicit GenContainer(cppcoro::generator generator) : generator_{std::move(generator)} {} }; diff --git a/src/engine/RuntimeInformation.cpp b/src/engine/RuntimeInformation.cpp index a8724f71a1..2e9abd05c1 100644 --- a/src/engine/RuntimeInformation.cpp +++ b/src/engine/RuntimeInformation.cpp @@ -36,6 +36,10 @@ std::string indentStr(size_t indent, bool stripped = false) { } return ind; } + +auto toMs(std::chrono::microseconds us) { + return std::chrono::duration_cast(us).count(); +} } // namespace // __________________________________________________________________________ @@ -67,9 +71,9 @@ void RuntimeInformation::writeToStream(std::ostream& out, size_t indent) const { << '\n'; out << indentStr(indent) << "columns: " << absl::StrJoin(columnNames_, ", ") << '\n'; - out << indentStr(indent) << "total_time: " << totalTime_.count() << " ms" + out << indentStr(indent) << "total_time: " << toMs(totalTime_) << " ms" << '\n'; - out << indentStr(indent) << "operation_time: " << getOperationTime().count() + out << indentStr(indent) << "operation_time: " << toMs(getOperationTime()) << " ms" << '\n'; out << indentStr(indent) << "status: " << toString(status_) << '\n'; out << indentStr(indent) @@ -77,11 +81,10 @@ void RuntimeInformation::writeToStream(std::ostream& out, size_t indent) const { if (cacheStatus_ != ad_utility::CacheStatus::computed) { out << indentStr(indent) // TODO use `<< originalTotalTime_` directly - << "original_total_time: " << originalTotalTime_.count() << " ms" - << '\n'; + << "original_total_time: " << toMs(originalTotalTime_) << " ms" << '\n'; out << indentStr(indent) - << "original_operation_time: " << originalOperationTime_.count() - << " ms" << '\n'; + << "original_operation_time: " << toMs(originalOperationTime_) << " ms" + << '\n'; } for (const auto& el : details_.items()) { out << indentStr(indent) << " " << el.key() << ": "; @@ -134,7 +137,7 @@ void RuntimeInformation::setColumnNames(const VariableToColumnMap& columnMap) { } // __________________________________________________________________________ -std::chrono::milliseconds RuntimeInformation::getOperationTime() const { +std::chrono::microseconds RuntimeInformation::getOperationTime() const { if (cacheStatus_ != ad_utility::CacheStatus::computed) { return totalTime_; } else { @@ -145,8 +148,8 @@ std::chrono::milliseconds RuntimeInformation::getOperationTime() const { children_ | std::views::transform(&RuntimeInformation::totalTime_); // Prevent "negative" computation times in case totalTime_ was not // computed for this yet. - return std::max(0ms, totalTime_ - std::reduce(timesOfChildren.begin(), - timesOfChildren.end(), 0ms)); + return std::max(0us, totalTime_ - std::reduce(timesOfChildren.begin(), + timesOfChildren.end(), 0us)); } } @@ -196,10 +199,10 @@ void to_json(nlohmann::ordered_json& j, const RuntimeInformation& rti) { {"result_rows", rti.numRows_}, {"result_cols", rti.numCols_}, {"column_names", rti.columnNames_}, - {"total_time", rti.totalTime_.count()}, - {"operation_time", rti.getOperationTime().count()}, - {"original_total_time", rti.originalTotalTime_.count()}, - {"original_operation_time", rti.originalOperationTime_.count()}, + {"total_time", toMs(rti.totalTime_)}, + {"operation_time", toMs(rti.getOperationTime())}, + {"original_total_time", toMs(rti.originalTotalTime_)}, + {"original_operation_time", toMs(rti.originalOperationTime_)}, {"cache_status", ad_utility::toString(rti.cacheStatus_)}, {"details", rti.details_}, {"estimated_total_cost", rti.costEstimate_}, diff --git a/src/engine/RuntimeInformation.h b/src/engine/RuntimeInformation.h index 1ba24d658b..ff4cdc1488 100644 --- a/src/engine/RuntimeInformation.h +++ b/src/engine/RuntimeInformation.h @@ -22,6 +22,7 @@ /// time to compute, status, etc.). Also contains the functionality to print /// that information nicely formatted and to export it to JSON. class RuntimeInformation { + using Microseconds = std::chrono::microseconds; using Milliseconds = std::chrono::milliseconds; public: @@ -47,12 +48,12 @@ class RuntimeInformation { /// The total time spent computing this operation. This includes the /// computation of the children. - Milliseconds totalTime_ = ZERO; + Microseconds totalTime_ = ZERO; /// In case this operation was read from the cache, we will store the time /// information about the original computation in the following two members. - Milliseconds originalTotalTime_ = ZERO; - Milliseconds originalOperationTime_ = ZERO; + Microseconds originalTotalTime_ = ZERO; + Microseconds originalOperationTime_ = ZERO; /// The estimated cost, size, and column multiplicities of the operation. size_t costEstimate_ = 0; @@ -100,7 +101,7 @@ class RuntimeInformation { /// Get the time spent computing the operation. This is the total time minus /// the time spent computing the children, but always positive. - [[nodiscard]] Milliseconds getOperationTime() const; + [[nodiscard]] Microseconds getOperationTime() const; /// Get the cost estimate for this operation. This is the total cost estimate /// minus the sum of the cost estimates of all children. diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index c45d34f40a..5a6e047b66 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -247,7 +247,7 @@ TEST(Operation, updateRuntimeStatsWorksCorrectly) { auto& rti = valuesForTesting.runtimeInfo(); // Test operation with built-in filter - valuesForTesting.externalFilterApplied_ = false; + valuesForTesting.externalLimitApplied_ = false; valuesForTesting.updateRuntimeStats(false, 11, 13, 17ms); EXPECT_EQ(rti.numCols_, 13); @@ -257,7 +257,7 @@ TEST(Operation, updateRuntimeStatsWorksCorrectly) { EXPECT_EQ(rti.originalOperationTime_, 17ms); // Test built-in filter - valuesForTesting.externalFilterApplied_ = false; + valuesForTesting.externalLimitApplied_ = false; valuesForTesting.updateRuntimeStats(true, 5, 3, 7ms); EXPECT_EQ(rti.numCols_, 13); @@ -274,7 +274,7 @@ TEST(Operation, updateRuntimeStatsWorksCorrectly) { auto& childRti = *rti.children_.at(0); // Test operation with external filter - valuesForTesting.externalFilterApplied_ = true; + valuesForTesting.externalLimitApplied_ = true; valuesForTesting.updateRuntimeStats(false, 31, 37, 41ms); EXPECT_EQ(rti.numCols_, 0); @@ -290,7 +290,7 @@ TEST(Operation, updateRuntimeStatsWorksCorrectly) { EXPECT_EQ(childRti.originalOperationTime_, 41ms); // Test external filter - valuesForTesting.externalFilterApplied_ = true; + valuesForTesting.externalLimitApplied_ = true; valuesForTesting.updateRuntimeStats(true, 19, 23, 29ms); EXPECT_EQ(rti.numCols_, 23); @@ -381,54 +381,6 @@ TEST(Operation, ensureFailedStatusIsSetWhenGeneratorThrowsException) { EXPECT_TRUE(signaledUpdate); } -// _____________________________________________________________________________ -TEST(Operation, testSubMillisecondsIncrementsAreStillTracked) { -#ifdef _QLEVER_NO_TIMING_TESTS - GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); -#endif - auto idTable = makeIdTableFromVector({{}}); - CustomGeneratorOperation operation{ - getQec(), [](const IdTable& idTable) -> cppcoro::generator { - std::this_thread::sleep_for(300us); - co_yield idTable.clone(); - std::this_thread::sleep_for(300us); - co_yield idTable.clone(); - std::this_thread::sleep_for(500us); - co_yield idTable.clone(); - }(idTable)}; - - ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; - auto result = - operation.runComputation(timer, ComputationMode::LAZY_IF_SUPPORTED); - - EXPECT_EQ(operation.runtimeInfo().totalTime_, 0ms); - EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 0ms); - EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 0ms); - - auto& idTables = result.idTables(); - - auto iterator = idTables.begin(); - ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(operation.runtimeInfo().totalTime_, 0ms); - EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 0ms); - EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 0ms); - - ++iterator; - ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(operation.runtimeInfo().totalTime_, 0ms); - EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 0ms); - EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 0ms); - - ++iterator; - ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(operation.runtimeInfo().totalTime_, 1ms); - EXPECT_EQ(operation.runtimeInfo().originalTotalTime_, 1ms); - EXPECT_EQ(operation.runtimeInfo().originalOperationTime_, 1ms); - - ++iterator; - ASSERT_EQ(iterator, idTables.end()); -} - // _____________________________________________________________________________ TEST(Operation, ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd) { #ifdef _QLEVER_NO_TIMING_TESTS diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index fe3b36731b..59966a6022 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -130,37 +130,37 @@ TEST(Result, AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( (Result{idTable.clone(), {1}, LocalVocab{}}), - ::testing::HasSubstr("compareRowsByJoinColumns"), ad_utility::Exception); + ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); for (auto& generator : getAllSubSplits(idTable)) { Result result{std::move(generator), {1}, LocalVocab{}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( consumeGenerator(result.idTables()), - ::testing::HasSubstr("compareRowsByJoinColumns"), + ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); } AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( (Result{idTable.clone(), {1, 0}, LocalVocab{}}), - ::testing::HasSubstr("compareRowsByJoinColumns"), ad_utility::Exception); + ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); for (auto& generator : getAllSubSplits(idTable)) { Result result{std::move(generator), {1, 0}, LocalVocab{}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( consumeGenerator(result.idTables()), - ::testing::HasSubstr("compareRowsByJoinColumns"), + ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); } AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( (Result{idTable.clone(), {2, 1}, LocalVocab{}}), - ::testing::HasSubstr("compareRowsByJoinColumns"), ad_utility::Exception); + ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); for (auto& generator : getAllSubSplits(idTable)) { Result result{std::move(generator), {2, 1}, LocalVocab{}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( consumeGenerator(result.idTables()), - ::testing::HasSubstr("compareRowsByJoinColumns"), + ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); } } From d7fcc988b249bbe66f906ba9e3c3a6aee4cf378c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 21 Aug 2024 02:54:33 +0200 Subject: [PATCH 132/133] Reduce code duplication in tests --- test/ConcurrentCacheTest.cpp | 50 +++-- test/ExportQueryExecutionTreesTest.cpp | 289 ++++++------------------- test/FilterTest.cpp | 89 ++++---- test/OperationTest.cpp | 154 +++++++------ test/ResultTest.cpp | 274 +++++++++-------------- test/engine/IndexScanTest.cpp | 22 +- test/util/OperationTestHelpers.h | 21 +- 7 files changed, 341 insertions(+), 558 deletions(-) diff --git a/test/ConcurrentCacheTest.cpp b/test/ConcurrentCacheTest.cpp index 7287dbe6ea..0478a6a6e2 100644 --- a/test/ConcurrentCacheTest.cpp +++ b/test/ConcurrentCacheTest.cpp @@ -14,6 +14,7 @@ #include "util/Cache.h" #include "util/ConcurrentCache.h" #include "util/DefaultValueSizeGetter.h" +#include "util/GTestHelpers.h" #include "util/Timer.h" #include "util/jthread.h" @@ -81,6 +82,7 @@ namespace { auto returnTrue = [](const auto&) { return true; }; } // namespace +// _____________________________________________________________________________ TEST(ConcurrentCache, sequentialComputation) { SimpleConcurrentLruCache a{3ul}; ad_utility::Timer t{ad_utility::Timer::Started}; @@ -396,6 +398,7 @@ TEST(ConcurrentCache, isCachedIfSuitableWhenWaitingForPendingComputation) { EXPECT_EQ(cache.numNonPinnedEntries(), 1); EXPECT_EQ(cache.numPinnedEntries(), 0); EXPECT_THAT(result._resultPointer, Pointee("abc"s)); + EXPECT_EQ(result._cacheStatus, ad_utility::CacheStatus::computed); EXPECT_TRUE(cache.cacheContains(0)); } @@ -404,6 +407,8 @@ TEST(ConcurrentCache, isCachedIfSuitableWhenWaitingForPendingComputationPinned) { SimpleConcurrentLruCache cache{}; + // Simulate a computation with the same cache key that is currently in + // progress so the new computation waits for the result. auto resultInProgress = std::make_shared< ad_utility::ConcurrentCacheDetail::ResultInProgress>(); @@ -470,39 +475,52 @@ TEST(ConcurrentCache, // _____________________________________________________________________________ TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) { + auto hasValue = [](std::string value) { + using namespace ::testing; + using CS = SimpleConcurrentLruCache::ResultAndCacheStatus; + return Optional( + Field("_resultPointer", &CS::_resultPointer, Pointee(value))); + }; + SimpleConcurrentLruCache cache{}; + auto expectContainsSingleEntry = + [&](bool pinned, ad_utility::source_location l = + ad_utility::source_location::current()) { + using namespace ::testing; + auto trace = generateLocationTrace(l); + if (pinned) { + EXPECT_NE(cache.pinnedSize(), 0_B); + EXPECT_EQ(cache.nonPinnedSize(), 0_B); + } else { + EXPECT_EQ(cache.pinnedSize(), 0_B); + EXPECT_NE(cache.nonPinnedSize(), 0_B); + } + }; + cache.tryInsertIfNotPresent(false, 0, std::make_shared("abc")); auto value = cache.getIfContained(0); - ASSERT_NE(value, std::nullopt); - EXPECT_THAT(value.value()._resultPointer, Pointee("abc"s)); - EXPECT_NE(cache.nonPinnedSize(), 0_B); - EXPECT_EQ(cache.pinnedSize(), 0_B); + EXPECT_THAT(value, hasValue("abc")); + expectContainsSingleEntry(false); cache.tryInsertIfNotPresent(false, 0, std::make_shared("def")); value = cache.getIfContained(0); - ASSERT_NE(value, std::nullopt); - EXPECT_THAT(value.value()._resultPointer, Pointee("abc"s)); - EXPECT_NE(cache.nonPinnedSize(), 0_B); - EXPECT_EQ(cache.pinnedSize(), 0_B); + EXPECT_THAT(value, hasValue("abc")); + expectContainsSingleEntry(false); cache.tryInsertIfNotPresent(true, 0, std::make_shared("ghi")); value = cache.getIfContained(0); - ASSERT_NE(value, std::nullopt); - EXPECT_THAT(value.value()._resultPointer, Pointee("abc"s)); - EXPECT_EQ(cache.nonPinnedSize(), 0_B); - EXPECT_NE(cache.pinnedSize(), 0_B); + EXPECT_THAT(value, hasValue("abc"s)); + expectContainsSingleEntry(true); cache.clearAll(); cache.tryInsertIfNotPresent(true, 0, std::make_shared("jkl")); value = cache.getIfContained(0); - ASSERT_NE(value, std::nullopt); - EXPECT_THAT(value.value()._resultPointer, Pointee("jkl"s)); - EXPECT_EQ(cache.nonPinnedSize(), 0_B); - EXPECT_NE(cache.pinnedSize(), 0_B); + EXPECT_THAT(value, hasValue("jkl"s)); + expectContainsSingleEntry(true); } diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index aad14d4b1f..0128a0f460 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -9,10 +9,13 @@ #include "engine/QueryPlanner.h" #include "parser/SparqlParser.h" #include "util/GTestHelpers.h" +#include "util/IdTableHelpers.h" #include "util/IdTestHelpers.h" #include "util/IndexTestHelpers.h" using namespace std::string_literals; +using ::testing::ElementsAre; +using ::testing::Eq; using ::testing::HasSubstr; // Run the given SPARQL `query` on the given Turtle `kg` and export the result @@ -209,6 +212,18 @@ static std::string makeXMLHeader( // The end of a SPARQL XML export. static const std::string xmlTrailer = "\n\n"; +// Template is only required because inner class is not visible +template +std::vector convertToVector(cppcoro::generator generator) { + std::vector result; + for (const auto& [idTable, range] : generator) { + result.emplace_back(idTable.numColumns(), idTable.getAllocator()); + result.back().insertAtEnd(idTable.begin() + *range.begin(), + idTable.begin() + *(range.end() - 1) + 1); + } + return result; +} + // ____________________________________________________________________________ TEST(ExportQueryExecutionTrees, Integers) { std::string kg = @@ -1072,18 +1087,14 @@ INSTANTIATE_TEST_SUITE_P(StreamableMediaTypes, StreamableMediaTypesFixture, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { - IdTable idTable{1, ad_utility::makeUnlimitedAllocator()}; - idTable.push_back({Id::makeFromInt(42)}); - idTable.push_back({Id::makeFromInt(1337)}); + auto idTable = makeIdTableFromVector({{42}, {1337}}); Result result{std::move(idTable), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getIdTables(result); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - ASSERT_EQ(iterator->size(), 2); - EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(42)); - EXPECT_EQ(iterator->at(1)[0], Id::makeFromInt(1337)); + EXPECT_EQ(*iterator, makeIdTableFromVector({{42}, {1337}})); ++iterator; EXPECT_EQ(iterator, generator.end()); @@ -1092,18 +1103,11 @@ TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - idTable1.push_back({Id::makeFromInt(2)}); - idTable1.push_back({Id::makeFromInt(3)}); - - co_yield std::move(idTable1); + IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); + co_yield idTable1; - IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; - idTable2.push_back({Id::makeFromInt(42)}); - idTable2.push_back({Id::makeFromInt(1337)}); - - co_yield std::move(idTable2); + IdTable idTable2 = makeIdTableFromVector({{42}, {1337}}); + co_yield idTable2; }(); Result result{std::move(tableGenerator), {}, LocalVocab{}}; @@ -1111,16 +1115,11 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - ASSERT_EQ(iterator->size(), 3); - EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(1)); - EXPECT_EQ(iterator->at(1)[0], Id::makeFromInt(2)); - EXPECT_EQ(iterator->at(2)[0], Id::makeFromInt(3)); + ASSERT_EQ(*iterator, makeIdTableFromVector({{1}, {2}, {3}})); ++iterator; ASSERT_NE(iterator, generator.end()); - ASSERT_EQ(iterator->size(), 2); - EXPECT_EQ(iterator->at(0)[0], Id::makeFromInt(42)); - EXPECT_EQ(iterator->at(1)[0], Id::makeFromInt(1337)); + ASSERT_EQ(*iterator, makeIdTableFromVector({{42}, {1337}})); ++iterator; EXPECT_EQ(iterator, generator.end()); @@ -1129,244 +1128,110 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - idTable1.push_back({Id::makeFromInt(2)}); - idTable1.push_back({Id::makeFromInt(3)}); - - co_yield std::move(idTable1); + IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); + co_yield idTable1; }(); Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); - - auto range = iterator->view_; - auto rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(2)); - - ++rangeIterator; - EXPECT_EQ(rangeIterator, range.end()); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + auto referenceTable = makeIdTableFromVector({{2}}); + EXPECT_THAT(convertToVector(std::move(generator)), + ElementsAre(Eq(std::cref(referenceTable)))); } // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped) { auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - idTable1.push_back({Id::makeFromInt(2)}); - idTable1.push_back({Id::makeFromInt(3)}); + IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); + co_yield idTable1; - co_yield std::move(idTable1); - - IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; - idTable2.push_back({Id::makeFromInt(4)}); - idTable2.push_back({Id::makeFromInt(5)}); - - co_yield std::move(idTable2); + IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); + co_yield idTable2; }(); Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); + auto referenceTable1 = makeIdTableFromVector({{4}, {5}}); - auto range = iterator->view_; - auto rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(4)); - - ++rangeIterator; - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(5)); - - ++rangeIterator; - EXPECT_EQ(rangeIterator, range.end()); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + EXPECT_THAT(convertToVector(std::move(generator)), + ElementsAre(Eq(std::cref(referenceTable1)))); } // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenLastIsSkipped) { auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - idTable1.push_back({Id::makeFromInt(2)}); - idTable1.push_back({Id::makeFromInt(3)}); + IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); + co_yield idTable1; - co_yield std::move(idTable1); - - IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; - idTable2.push_back({Id::makeFromInt(4)}); - idTable2.push_back({Id::makeFromInt(5)}); - - co_yield std::move(idTable2); + IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); + co_yield idTable2; }(); Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); + auto referenceTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - auto range = iterator->view_; - auto rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(1)); - - ++rangeIterator; - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(2)); - - ++rangeIterator; - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(3)); - - ++rangeIterator; - EXPECT_EQ(rangeIterator, range.end()); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + EXPECT_THAT(convertToVector(std::move(generator)), + ElementsAre(Eq(std::cref(referenceTable1)))); } // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstAndSecondArePartial) { auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - idTable1.push_back({Id::makeFromInt(2)}); - idTable1.push_back({Id::makeFromInt(3)}); - - co_yield std::move(idTable1); - - IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; - idTable2.push_back({Id::makeFromInt(4)}); - idTable2.push_back({Id::makeFromInt(5)}); + IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); + co_yield idTable1; - co_yield std::move(idTable2); + IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); + co_yield idTable2; }(); Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); - - auto range = iterator->view_; - auto rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(2)); - - ++rangeIterator; - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(3)); - - ++rangeIterator; - ASSERT_EQ(rangeIterator, range.end()); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - - range = iterator->view_; - rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(4)); + auto referenceTable1 = makeIdTableFromVector({{2}, {3}}); + auto referenceTable2 = makeIdTableFromVector({{4}}); - ++rangeIterator; - EXPECT_EQ(rangeIterator, range.end()); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + EXPECT_THAT(convertToVector(std::move(generator)), + ElementsAre(Eq(std::cref(referenceTable1)), + Eq(std::cref(referenceTable2)))); } // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstAndLastArePartial) { auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - idTable1.push_back({Id::makeFromInt(2)}); - idTable1.push_back({Id::makeFromInt(3)}); - - co_yield std::move(idTable1); + IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); + co_yield idTable1; - IdTable idTable2{1, ad_utility::makeUnlimitedAllocator()}; - idTable2.push_back({Id::makeFromInt(4)}); - idTable2.push_back({Id::makeFromInt(5)}); + IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); + co_yield idTable2; - co_yield std::move(idTable2); - - IdTable idTable3{1, ad_utility::makeUnlimitedAllocator()}; - idTable3.push_back({Id::makeFromInt(6)}); - idTable3.push_back({Id::makeFromInt(7)}); - idTable3.push_back({Id::makeFromInt(8)}); - idTable3.push_back({Id::makeFromInt(9)}); - - co_yield std::move(idTable3); + IdTable idTable3 = makeIdTableFromVector({{6}, {7}, {8}, {9}}); + co_yield idTable3; }(); Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); - - auto range = iterator->view_; - auto rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(3)); - - ++rangeIterator; - EXPECT_EQ(rangeIterator, range.end()); - - ++iterator; - ASSERT_NE(iterator, generator.end()); + auto referenceTable1 = makeIdTableFromVector({{3}}); + auto referenceTable2 = makeIdTableFromVector({{4}, {5}}); + auto referenceTable3 = makeIdTableFromVector({{6}, {7}}); - range = iterator->view_; - rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(4)); - - ++rangeIterator; - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(5)); - - ++rangeIterator; - EXPECT_EQ(rangeIterator, range.end()); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - - range = iterator->view_; - rangeIterator = range.begin(); - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(6)); - - ++rangeIterator; - ASSERT_NE(rangeIterator, range.end()); - EXPECT_EQ(iterator->idTable_.at(*rangeIterator)[0], Id::makeFromInt(7)); - - ++rangeIterator; - EXPECT_EQ(rangeIterator, range.end()); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + EXPECT_THAT(convertToVector(std::move(generator)), + ElementsAre(Eq(std::cref(referenceTable1)), + Eq(std::cref(referenceTable2)), + Eq(std::cref(referenceTable3)))); } // _____________________________________________________________________________ @@ -1381,18 +1246,13 @@ TEST(ExportQueryExecutionTrees, ensureGeneratorIsNotConsumedWhenNotRequired) { Result result{std::move(throwingGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 0, ._offset = 0}, result); - EXPECT_NO_THROW({ - for ([[maybe_unused]] const auto& info : generator) { - } - }); + EXPECT_NO_THROW(convertToVector(std::move(generator))); } { auto throwAfterYieldGenerator = []() -> cppcoro::generator { - IdTable idTable1{1, ad_utility::makeUnlimitedAllocator()}; - idTable1.push_back({Id::makeFromInt(1)}); - - co_yield std::move(idTable1); + IdTable idTable1 = makeIdTableFromVector({{1}}); + co_yield idTable1; ADD_FAILURE() << "Generator was resumed" << std::endl; throw std::runtime_error("Generator was resumed"); @@ -1401,16 +1261,9 @@ TEST(ExportQueryExecutionTrees, ensureGeneratorIsNotConsumedWhenNotRequired) { Result result{std::move(throwAfterYieldGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 0}, result); - bool executed = false; - EXPECT_NO_THROW({ - for (const auto& [idTable, range] : generator) { - for (uint64_t i : range) { - executed = true; - EXPECT_EQ(idTable.at(i)[0], Id::makeFromInt(1)); - } - } - }); - - EXPECT_TRUE(executed); + IdTable referenceTable1 = makeIdTableFromVector({{1}}); + std::vector tables; + EXPECT_NO_THROW({ tables = convertToVector(std::move(generator)); }); + EXPECT_THAT(tables, ElementsAre(Eq(std::cref(referenceTable1)))); } } diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index 5fff02c8cf..38b1370aa1 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -7,34 +7,38 @@ #include "engine/Filter.h" #include "engine/ValuesForTesting.h" #include "engine/sparqlExpressions/LiteralExpression.h" +#include "util/IdTableHelpers.h" #include "util/IndexTestHelpers.h" using ::testing::ElementsAre; +using ::testing::Eq; -IdTable makeIdTable(std::vector bools) { - IdTable idTable{1, ad_utility::makeUnlimitedAllocator()}; - for (bool b : bools) { - idTable.push_back({Id::makeFromBool(b)}); - } - return idTable; -} +namespace { +// Shorthand for makeFromBool +ValueId asBool(bool value) { return Id::makeFromBool(value); } -columnBasedIdTable::Row makeRow(bool b) { - columnBasedIdTable::Row row{1}; - row[0] = Id::makeFromBool(b); - return row; +// Convert a generator to a vector for easier comparison in assertions +std::vector toVector(cppcoro::generator generator) { + std::vector result; + for (auto& table : generator) { + result.push_back(std::move(table)); + } + return result; } +} // namespace // _____________________________________________________________________________ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { QueryExecutionContext* qec = ad_utility::testing::getQec(); qec->getQueryTreeCache().clearAll(); std::vector idTables; - idTables.push_back(makeIdTable({true, true, false, false, true})); - idTables.push_back(makeIdTable({true, false})); - idTables.push_back(makeIdTable({})); - idTables.push_back(makeIdTable({false, false, false})); - idTables.push_back(makeIdTable({true})); + idTables.push_back(makeIdTableFromVector( + {{true}, {true}, {false}, {false}, {true}}, asBool)); + idTables.push_back(makeIdTableFromVector({{true}, {false}}, asBool)); + idTables.push_back(IdTable{1, ad_utility::makeUnlimitedAllocator()}); + idTables.push_back( + makeIdTableFromVector({{false}, {false}, {false}}, asBool)); + idTables.push_back(makeIdTableFromVector({{true}}, asBool)); ValuesForTesting values{qec, std::move(idTables), {Variable{"?x"}}}; QueryExecutionTree subTree{ @@ -49,29 +53,17 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnLazyEvaluation) { ASSERT_FALSE(result->isFullyMaterialized()); auto& generator = result->idTables(); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); - EXPECT_THAT(*iterator, - ElementsAre(makeRow(true), makeRow(true), makeRow(true))); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_THAT(*iterator, ElementsAre(makeRow(true))); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_THAT(*iterator, ElementsAre()); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_THAT(*iterator, ElementsAre()); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - EXPECT_THAT(*iterator, ElementsAre(makeRow(true))); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + auto referenceTable1 = + makeIdTableFromVector({{true}, {true}, {true}}, asBool); + auto referenceTable2 = makeIdTableFromVector({{true}}, asBool); + IdTable referenceTable3{0, ad_utility::makeUnlimitedAllocator()}; + + EXPECT_THAT(toVector(std::move(generator)), + ElementsAre(Eq(std::cref(referenceTable1)), + Eq(std::cref(referenceTable2)), + Eq(std::cref(referenceTable3)), + Eq(std::cref(referenceTable3)), + Eq(std::cref(referenceTable2)))); } // _____________________________________________________________________________ @@ -79,11 +71,13 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnNonLazyEvaluation) { QueryExecutionContext* qec = ad_utility::testing::getQec(); qec->getQueryTreeCache().clearAll(); std::vector idTables; - idTables.push_back(makeIdTable({true, true, false, false, true})); - idTables.push_back(makeIdTable({true, false})); - idTables.push_back(makeIdTable({})); - idTables.push_back(makeIdTable({false, false, false})); - idTables.push_back(makeIdTable({true})); + idTables.push_back(makeIdTableFromVector( + {{true}, {true}, {false}, {false}, {true}}, asBool)); + idTables.push_back(makeIdTableFromVector({{true}, {false}}, asBool)); + idTables.push_back(IdTable{1, ad_utility::makeUnlimitedAllocator()}); + idTables.push_back( + makeIdTableFromVector({{false}, {false}, {false}}, asBool)); + idTables.push_back(makeIdTableFromVector({{true}}, asBool)); ValuesForTesting values{qec, std::move(idTables), {Variable{"?x"}}}; QueryExecutionTree subTree{ @@ -96,7 +90,8 @@ TEST(Filter, verifyPredicateIsAppliedCorrectlyOnNonLazyEvaluation) { auto result = filter.getResult(false, ComputationMode::FULLY_MATERIALIZED); ASSERT_TRUE(result->isFullyMaterialized()); - EXPECT_THAT(result->idTable(), - ElementsAre(makeRow(true), makeRow(true), makeRow(true), - makeRow(true), makeRow(true))); + + EXPECT_EQ( + result->idTable(), + makeIdTableFromVector({{true}, {true}, {true}, {true}, {true}}, asBool)); } diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 5a6e047b66..820fcd3913 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -16,6 +16,23 @@ using ad_utility::CancellationException; using ad_utility::CancellationHandle; using ad_utility::CancellationState; +namespace { +// Helper function to perform actions at various stages of a generator +template +auto expectAtEachStageOfGenerator( + cppcoro::generator generator, + std::vector> functions, + ad_utility::source_location l = ad_utility::source_location::current()) { + auto locationTrace = generateLocationTrace(l); + size_t index = 0; + for ([[maybe_unused]] T& _ : generator) { + functions.at(index)(); + ++index; + } + EXPECT_EQ(index, functions.size()); +} +} // namespace + // ________________________________________________ TEST(OperationTest, limitIsRepresentedInCacheKey) { NeutralElementOperation n{getQec()}; @@ -323,37 +340,35 @@ TEST(Operation, verifyRuntimeInformationIsUpdatedForLazyOperations) { auto result = valuesForTesting.runComputation( timer, ComputationMode::LAZY_IF_SUPPORTED); - EXPECT_EQ(valuesForTesting.runtimeInfo().status_, - RuntimeInformation::Status::lazilyMaterialized); - EXPECT_EQ(valuesForTesting.runtimeInfo().totalTime_, 0ms); - EXPECT_EQ(valuesForTesting.runtimeInfo().originalTotalTime_, 0ms); - EXPECT_EQ(valuesForTesting.runtimeInfo().originalOperationTime_, 0ms); + auto& rti = valuesForTesting.runtimeInfo(); + + EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(rti.totalTime_, 0ms); + EXPECT_EQ(rti.originalTotalTime_, 0ms); + EXPECT_EQ(rti.originalOperationTime_, 0ms); auto& idTables = result.idTables(); auto iterator = idTables.begin(); ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(valuesForTesting.runtimeInfo().status_, - RuntimeInformation::Status::lazilyMaterialized); - EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 1); + EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(rti.numCols_, 2); + EXPECT_EQ(rti.numRows_, 1); ++iterator; ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(valuesForTesting.runtimeInfo().status_, - RuntimeInformation::Status::lazilyMaterialized); - EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 2); + EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(rti.numCols_, 2); + EXPECT_EQ(rti.numRows_, 2); ++iterator; ASSERT_EQ(iterator, idTables.end()); - EXPECT_EQ(valuesForTesting.runtimeInfo().status_, - RuntimeInformation::Status::lazilyMaterialized); - EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 2); + EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(rti.numCols_, 2); + EXPECT_EQ(rti.numRows_, 2); } // _____________________________________________________________________________ @@ -366,7 +381,7 @@ TEST(Operation, ensureFailedStatusIsSetWhenGeneratorThrowsException) { QueryExecutionContext context{ index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), SortPerformanceEstimator{}, [&](std::string) { signaledUpdate = true; }}; - AlwaysFailLazyOperation operation{&context}; + AlwaysFailOperation operation{&context}; ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; auto result = operation.runComputation(timer, ComputationMode::LAZY_IF_SUPPORTED); @@ -416,26 +431,14 @@ TEST(Operation, ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd) { EXPECT_EQ(updateCallCounter, 1); - auto& idTables = result.idTables(); - - auto iterator = idTables.begin(); - ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(updateCallCounter, 2); - - ++iterator; - ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(updateCallCounter, 2); - - ++iterator; - ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(updateCallCounter, 3); + expectAtEachStageOfGenerator(std::move(result.idTables()), + { + [&]() { EXPECT_EQ(updateCallCounter, 2); }, + [&]() { EXPECT_EQ(updateCallCounter, 2); }, + [&]() { EXPECT_EQ(updateCallCounter, 3); }, + [&]() { EXPECT_EQ(updateCallCounter, 3); }, + }); - ++iterator; - ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(updateCallCounter, 3); - - ++iterator; - ASSERT_EQ(iterator, idTables.end()); EXPECT_EQ(updateCallCounter, 4); } @@ -489,36 +492,32 @@ TEST(Operation, verifyLimitIsProperlyAppliedAndUpdatesRuntimeInfoCorrectly) { auto result = valuesForTesting.runComputation( timer, ComputationMode::LAZY_IF_SUPPORTED); - EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 0); - EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 0); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 0); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 0); - - auto& idTables = result.idTables(); - - auto iterator = idTables.begin(); - ASSERT_NE(iterator, idTables.end()); - - EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 0); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 1); - - ++iterator; - ASSERT_NE(iterator, idTables.end()); - - EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 1); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 3); - - ++iterator; - ASSERT_EQ(iterator, idTables.end()); + auto& rti = valuesForTesting.runtimeInfo(); + auto& childRti = *rti.children_.at(0); - EXPECT_EQ(valuesForTesting.runtimeInfo().numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().numRows_, 1); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numCols_, 2); - EXPECT_EQ(valuesForTesting.runtimeInfo().children_.at(0)->numRows_, 3); + EXPECT_EQ(rti.numCols_, 0); + EXPECT_EQ(rti.numRows_, 0); + EXPECT_EQ(childRti.numCols_, 0); + EXPECT_EQ(childRti.numRows_, 0); + + expectAtEachStageOfGenerator(std::move(result.idTables()), + {[&]() { + EXPECT_EQ(rti.numCols_, 2); + EXPECT_EQ(rti.numRows_, 0); + EXPECT_EQ(childRti.numCols_, 2); + EXPECT_EQ(childRti.numRows_, 1); + }, + [&]() { + EXPECT_EQ(rti.numCols_, 2); + EXPECT_EQ(rti.numRows_, 1); + EXPECT_EQ(childRti.numCols_, 2); + EXPECT_EQ(childRti.numRows_, 3); + }}); + + EXPECT_EQ(rti.numCols_, 2); + EXPECT_EQ(rti.numRows_, 1); + EXPECT_EQ(childRti.numCols_, 2); + EXPECT_EQ(childRti.numRows_, 3); } // _____________________________________________________________________________ @@ -544,18 +543,17 @@ TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough) { ASSERT_TRUE(aggregatedValue.has_value()); ASSERT_TRUE(aggregatedValue.value()._resultPointer); - auto newRuntimeInfo = aggregatedValue.value()._resultPointer->runtimeInfo(); - auto& oldRuntimeInfo = valuesForTesting.runtimeInfo(); - EXPECT_EQ(newRuntimeInfo.descriptor_, oldRuntimeInfo.descriptor_); - EXPECT_EQ(newRuntimeInfo.numCols_, oldRuntimeInfo.numCols_); - EXPECT_EQ(newRuntimeInfo.numRows_, oldRuntimeInfo.numRows_); - EXPECT_EQ(newRuntimeInfo.totalTime_, oldRuntimeInfo.totalTime_); - EXPECT_EQ(newRuntimeInfo.originalTotalTime_, - oldRuntimeInfo.originalTotalTime_); - EXPECT_EQ(newRuntimeInfo.originalOperationTime_, - oldRuntimeInfo.originalOperationTime_); - EXPECT_EQ(newRuntimeInfo.status_, - RuntimeInformation::Status::fullyMaterialized); + + auto newRti = aggregatedValue.value()._resultPointer->runtimeInfo(); + auto& oldRti = valuesForTesting.runtimeInfo(); + + EXPECT_EQ(newRti.descriptor_, oldRti.descriptor_); + EXPECT_EQ(newRti.numCols_, oldRti.numCols_); + EXPECT_EQ(newRti.numRows_, oldRti.numRows_); + EXPECT_EQ(newRti.totalTime_, oldRti.totalTime_); + EXPECT_EQ(newRti.originalTotalTime_, oldRti.originalTotalTime_); + EXPECT_EQ(newRti.originalOperationTime_, oldRti.originalOperationTime_); + EXPECT_EQ(newRti.status_, RuntimeInformation::Status::fullyMaterialized); const auto& aggregatedResult = aggregatedValue.value()._resultPointer->resultTable(); diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index 59966a6022..5d2c733e7a 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -93,59 +93,36 @@ TEST(Result, verifyIdTablesThrowsWhenFullyMaterialized) { } // _____________________________________________________________________________ -TEST(Result, - verifyAssertSortOrderIsRespectedThrowsWhenNotSortedAndSucceedsWhenSorted) { +using CIs = std::vector; +class ResultSortTestS : public testing::TestWithParam {}; +class ResultSortTestF : public testing::TestWithParam {}; + +TEST_P(ResultSortTestS, verifyAssertSortOrderIsRespectedSucceedsWhenSorted) { if constexpr (!ad_utility::areExpensiveChecksEnabled) { GTEST_SKIP_("Expensive checks are disabled, skipping test."); } auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); - EXPECT_NO_THROW((Result{idTable.clone(), {}, LocalVocab{}})); - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), GetParam(), LocalVocab{}}; EXPECT_NO_THROW(consumeGenerator(result.idTables())); } - EXPECT_NO_THROW((Result{idTable.clone(), {0}, LocalVocab{}})); - - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {0}, LocalVocab{}}; - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - - EXPECT_NO_THROW((Result{idTable.clone(), {0, 1}, LocalVocab{}})); - - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {0, 1}, LocalVocab{}}; - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - - EXPECT_NO_THROW((Result{idTable.clone(), {2, 0}, LocalVocab{}})); - - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {2, 0}, LocalVocab{}}; - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } + EXPECT_NO_THROW((Result{std::move(idTable), GetParam(), LocalVocab{}})); +} - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - (Result{idTable.clone(), {1}, LocalVocab{}}), - ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); +INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultSortTestS, + testing::Values(CIs{}, CIs{0}, CIs{0, 1}, CIs{2, 0})); - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {1}, LocalVocab{}}; - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - consumeGenerator(result.idTables()), - ::testing::HasSubstr("compareRowsBySortColumns"), - ad_utility::Exception); +// _____________________________________________________________________________ +TEST_P(ResultSortTestF, verifyAssertSortOrderIsRespectedThrowsWhenNotSorted) { + if constexpr (!ad_utility::areExpensiveChecksEnabled) { + GTEST_SKIP_("Expensive checks are disabled, skipping test."); } - - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - (Result{idTable.clone(), {1, 0}, LocalVocab{}}), - ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); + auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {1, 0}, LocalVocab{}}; + Result result{std::move(generator), GetParam(), LocalVocab{}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( consumeGenerator(result.idTables()), ::testing::HasSubstr("compareRowsBySortColumns"), @@ -153,47 +130,36 @@ TEST(Result, } AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - (Result{idTable.clone(), {2, 1}, LocalVocab{}}), + (Result{std::move(idTable), GetParam(), LocalVocab{}}), ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); - - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {2, 1}, LocalVocab{}}; - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - consumeGenerator(result.idTables()), - ::testing::HasSubstr("compareRowsBySortColumns"), - ad_utility::Exception); - } } +INSTANTIATE_TEST_SUITE_P(FailureCases, ResultSortTestF, + testing::Values(CIs{1}, CIs{1, 0}, CIs{2, 1})); + // _____________________________________________________________________________ TEST(Result, verifyAnErrorIsThrownIfSortedByHasHigherIndicesThanTheTableHasColumns) { auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); + using ad_utility::Exception; + auto matcher = ::testing::HasSubstr("colIndex < idTable.numColumns()"); AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - (Result{idTable.clone(), {3}, LocalVocab{}}), - ::testing::HasSubstr("colIndex < idTable.numColumns()"), - ad_utility::Exception); + (Result{idTable.clone(), {3}, LocalVocab{}}), matcher, Exception); for (auto& generator : getAllSubSplits(idTable)) { Result result{std::move(generator), {3}, LocalVocab{}}; - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - consumeGenerator(result.idTables()), - ::testing::HasSubstr("colIndex < idTable.numColumns()"), - ad_utility::Exception); + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(consumeGenerator(result.idTables()), + matcher, Exception); } AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - (Result{idTable.clone(), {2, 1337}, LocalVocab{}}), - ::testing::HasSubstr("colIndex < idTable.numColumns()"), - ad_utility::Exception); + (Result{idTable.clone(), {2, 1337}, LocalVocab{}}), matcher, Exception); for (auto& generator : getAllSubSplits(idTable)) { Result result{std::move(generator), {2, 1337}, LocalVocab{}}; - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - consumeGenerator(result.idTables()), - ::testing::HasSubstr("colIndex < idTable.numColumns()"), - ad_utility::Exception); + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(consumeGenerator(result.idTables()), + matcher, Exception); } } @@ -373,8 +339,6 @@ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { // NOTE: duration can't be tested here, processors are too fast auto comparisonTable = makeIdTableFromVector({{2, 7}, {3, 6}}); EXPECT_EQ(innerTable, comparisonTable); - EXPECT_EQ(innerTable.numColumns(), 2); - EXPECT_EQ(innerTable.numRows(), 2); ++callCounter; }); EXPECT_EQ(callCounter, 1); @@ -389,6 +353,8 @@ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { // NOTE: duration can't be tested here, processors are too fast for (const auto& row : innerTable) { ASSERT_EQ(row.size(), 2); + // Make sure we never get values that were supposed to be filtered + // out. EXPECT_NE(row[0].getVocabIndex().get(), 0); EXPECT_NE(row[0].getVocabIndex().get(), 1); EXPECT_NE(row[0].getVocabIndex().get(), 4); @@ -439,147 +405,107 @@ TEST(Result, verifyApplyLimitOffsetHandlesZeroLimitCorrectly) { } // _____________________________________________________________________________ -TEST(Result, verifyAssertThatLimitWasRespectedDoesThrowIfLimitWasNotRespected) { +using LIC = LimitOffsetClause; +class ResultLimitTestS : public testing::TestWithParam {}; +class ResultLimitTestF : public testing::TestWithParam {}; + +TEST_P(ResultLimitTestS, + verifyAssertThatLimitWasRespectedDoesNotThrowIfLimitWasRespected) { auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); { Result result{idTable.clone(), {}, LocalVocab{}}; - EXPECT_NO_THROW(result.assertThatLimitWasRespected({})); - EXPECT_NO_THROW(result.assertThatLimitWasRespected({4, 0})); - EXPECT_NO_THROW(result.assertThatLimitWasRespected({4, 1337})); - EXPECT_NO_THROW(result.assertThatLimitWasRespected({42, 0})); - EXPECT_NO_THROW(result.assertThatLimitWasRespected({42, 1337})); - EXPECT_THROW(result.assertThatLimitWasRespected({3, 0}), - ad_utility::Exception); - EXPECT_THROW(result.assertThatLimitWasRespected({3, 1}), - ad_utility::Exception); - EXPECT_THROW(result.assertThatLimitWasRespected({3, 2}), - ad_utility::Exception); + EXPECT_NO_THROW(result.assertThatLimitWasRespected(GetParam())); } - auto createResultsAndApplyAssertion = [&](LimitOffsetClause limitOffset) { - std::vector results; - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.assertThatLimitWasRespected(limitOffset); - results.push_back(std::move(result)); - } - return results; - }; - - for (auto& result : createResultsAndApplyAssertion({})) { - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - for (auto& result : createResultsAndApplyAssertion({4, 0})) { - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - for (auto& result : createResultsAndApplyAssertion({4, 1337})) { - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - for (auto& result : createResultsAndApplyAssertion({42, 0})) { - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - for (auto& result : createResultsAndApplyAssertion({42, 1337})) { + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.assertThatLimitWasRespected(GetParam()); EXPECT_NO_THROW(consumeGenerator(result.idTables())); } - for (auto& result : createResultsAndApplyAssertion({3, 0})) { - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); - } - for (auto& result : createResultsAndApplyAssertion({3, 1})) { - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); +} + +INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultLimitTestS, + testing::Values(LIC{}, LIC{4, 0}, LIC{4, 1337}, + LIC{42, 0}, LIC{42, 1337})); + +// _____________________________________________________________________________ +TEST_P(ResultLimitTestF, + verifyAssertThatLimitWasRespectedDoesThrowIfLimitWasNotRespected) { + auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); + { + Result result{idTable.clone(), {}, LocalVocab{}}; + EXPECT_THROW(result.assertThatLimitWasRespected(GetParam()), + ad_utility::Exception); } - for (auto& result : createResultsAndApplyAssertion({3, 2})) { + + for (auto& generator : getAllSubSplits(idTable)) { + Result result{std::move(generator), {}, LocalVocab{}}; + result.assertThatLimitWasRespected(GetParam()); EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); } } +INSTANTIATE_TEST_SUITE_P(FailureCases, ResultLimitTestF, + testing::Values(LIC{3, 0}, LIC{3, 1}, LIC{3, 2})); + // _____________________________________________________________________________ -TEST(Result, - verifyCheckDefinednessDoesThrowIfColumnIsNotDefinedWhenClaimingItIs) { +class ResultDefinednessTestS : public testing::TestWithParam {}; +class ResultDefinednessTestF : public testing::TestWithParam {}; + +auto u = Id::makeUndefined(); +auto correctTable1 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); +auto correctTable2 = makeIdTableFromVector({{0, u}, {1, 6}, {2, 5}, {3, 4}}); +auto correctTable3 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, u}}); +auto correctTable4 = makeIdTableFromVector({{0, u}, {1, u}, {2, u}, {3, u}}); +auto wrongTable1 = makeIdTableFromVector({{u, 7}, {1, 6}, {2, 5}, {3, 4}}); +auto wrongTable2 = makeIdTableFromVector({{u, 7}, {u, 6}, {u, 5}, {u, 4}}); +auto wrongTable3 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {u, 4}}); + +TEST_P(ResultDefinednessTestS, + verifyCheckDefinednessDoesNotThrowIfColumnIsDefined) { if constexpr (!ad_utility::areExpensiveChecksEnabled) { GTEST_SKIP_("Expensive checks are disabled, skipping test."); } - auto correctTable1 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); - auto correctTable2 = - makeIdTableFromVector({{0, Id::makeUndefined()}, {1, 6}, {2, 5}, {3, 4}}); - auto correctTable3 = - makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, Id::makeUndefined()}}); - auto correctTable4 = makeIdTableFromVector({{0, Id::makeUndefined()}, - {1, Id::makeUndefined()}, - {2, Id::makeUndefined()}, - {3, Id::makeUndefined()}}); - auto wrongTable1 = - makeIdTableFromVector({{Id::makeUndefined(), 7}, {1, 6}, {2, 5}, {3, 4}}); - auto wrongTable2 = makeIdTableFromVector({{Id::makeUndefined(), 7}, - {Id::makeUndefined(), 6}, - {Id::makeUndefined(), 5}, - {Id::makeUndefined(), 4}}); - auto wrongTable3 = - makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {Id::makeUndefined(), 4}}); VariableToColumnMap map{ {Variable{"?a"}, {0, ColumnIndexAndTypeInfo::AlwaysDefined}}, {Variable{"?b"}, {1, ColumnIndexAndTypeInfo::PossiblyUndefined}}}; { - Result result{correctTable1.clone(), {}, LocalVocab{}}; + Result result{GetParam()->clone(), {}, LocalVocab{}}; EXPECT_NO_THROW(result.checkDefinedness(map)); } - { - Result result{correctTable2.clone(), {}, LocalVocab{}}; - EXPECT_NO_THROW(result.checkDefinedness(map)); - } - { - Result result{correctTable3.clone(), {}, LocalVocab{}}; - EXPECT_NO_THROW(result.checkDefinedness(map)); - } - { - Result result{correctTable4.clone(), {}, LocalVocab{}}; - EXPECT_NO_THROW(result.checkDefinedness(map)); - } - { - Result result{wrongTable1.clone(), {}, LocalVocab{}}; - EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); - } - { - Result result{wrongTable2.clone(), {}, LocalVocab{}}; - EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); - } - { - Result result{wrongTable3.clone(), {}, LocalVocab{}}; - EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); - } - for (auto& generator : getAllSubSplits(correctTable1)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - for (auto& generator : getAllSubSplits(correctTable2)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - for (auto& generator : getAllSubSplits(correctTable3)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - for (auto& generator : getAllSubSplits(correctTable4)) { + for (auto& generator : getAllSubSplits(*GetParam())) { Result result{std::move(generator), {}, LocalVocab{}}; result.checkDefinedness(map); EXPECT_NO_THROW(consumeGenerator(result.idTables())); } - for (auto& generator : getAllSubSplits(wrongTable1)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); +} + +INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultDefinednessTestS, + testing::Values(&correctTable1, &correctTable2, + &correctTable3, &correctTable4)); + +// _____________________________________________________________________________ +TEST_P(ResultDefinednessTestF, + verifyCheckDefinednessDoesThrowIfColumnIsNotDefinedWhenClaimingItIs) { + if constexpr (!ad_utility::areExpensiveChecksEnabled) { + GTEST_SKIP_("Expensive checks are disabled, skipping test."); } - for (auto& generator : getAllSubSplits(wrongTable2)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + VariableToColumnMap map{ + {Variable{"?a"}, {0, ColumnIndexAndTypeInfo::AlwaysDefined}}, + {Variable{"?b"}, {1, ColumnIndexAndTypeInfo::PossiblyUndefined}}}; + + { + Result result{GetParam()->clone(), {}, LocalVocab{}}; + EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); } - for (auto& generator : getAllSubSplits(wrongTable3)) { + for (auto& generator : getAllSubSplits(*GetParam())) { Result result{std::move(generator), {}, LocalVocab{}}; result.checkDefinedness(map); EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); } } + +INSTANTIATE_TEST_SUITE_P(FailureCases, ResultDefinednessTestF, + testing::Values(&wrongTable1, &wrongTable2, + &wrongTable3)); diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 2f3a89259c..54730505d9 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -462,28 +462,14 @@ TEST(IndexScan, computeResultCanBeConsumedLazily) { ASSERT_FALSE(result.isFullyMaterialized()); - std::vector resultValues; + IdTable resultTable{3, ad_utility::makeUnlimitedAllocator()}; for (IdTable& idTable : result.idTables()) { - for (IdTable::row_type row : idTable) { - resultValues.push_back(row); - } + resultTable.insertAtEnd(idTable); } - ASSERT_EQ(resultValues.size(), 3); - ASSERT_EQ(resultValues[0].numColumns(), 3); - ASSERT_EQ(resultValues[1].numColumns(), 3); - ASSERT_EQ(resultValues[2].numColumns(), 3); - - EXPECT_EQ(resultValues[0][2], x); - EXPECT_EQ(resultValues[0][0], p); - EXPECT_EQ(resultValues[0][1], s1); - EXPECT_EQ(resultValues[1][2], x); - EXPECT_EQ(resultValues[1][0], p); - EXPECT_EQ(resultValues[1][1], s2); - EXPECT_EQ(resultValues[2][2], x); - EXPECT_EQ(resultValues[2][0], p2); - EXPECT_EQ(resultValues[2][1], s1); + EXPECT_EQ(resultTable, + makeIdTableFromVector({{p, s1, x}, {p, s2, x}, {p2, s1, x}})); } // _____________________________________________________________________________ diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index f683c9a381..b3e8b473f8 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -85,11 +85,12 @@ class ShallowParentOperation : public Operation { } }; -class AlwaysFailLazyOperation : public Operation { +// Operation that will throw on `computeResult` for testing. +class AlwaysFailOperation : public Operation { std::vector getChildren() override { return {}; } - string getCacheKeyImpl() const override { return "AlwaysFailLazyOperation"; } + string getCacheKeyImpl() const override { AD_CONTRACT_CHECK(false); } string getDescriptor() const override { - return "AlwaysFailLazyOperationDescriptor"; + return "AlwaysFailOperationDescriptor"; } size_t getResultWidth() const override { return 0; } size_t getCostEstimate() override { return 0; } @@ -101,9 +102,12 @@ class AlwaysFailLazyOperation : public Operation { public: using Operation::Operation; - ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { + ProtoResult computeResult(bool requestLaziness) override { + if (!requestLaziness) { + throw std::runtime_error{"AlwaysFailOperation"}; + } return {[]() -> cppcoro::generator { - throw std::runtime_error{"AlwaysFailLazyOperation"}; + throw std::runtime_error{"AlwaysFailOperation"}; // Required so that the exception only occurs within the generator co_return; }(), @@ -111,10 +115,12 @@ class AlwaysFailLazyOperation : public Operation { } }; +// Lazy operation that will yield a result with a custom generator you can +// provide via the constructor. class CustomGeneratorOperation : public Operation { cppcoro::generator generator_; std::vector getChildren() override { return {}; } - string getCacheKeyImpl() const override { return "CustomGeneratorOperation"; } + string getCacheKeyImpl() const override { AD_CONTRACT_CHECK(false); } string getDescriptor() const override { return "CustomGeneratorOperationDescriptor"; } @@ -130,7 +136,8 @@ class CustomGeneratorOperation : public Operation { CustomGeneratorOperation(QueryExecutionContext* context, cppcoro::generator generator) : Operation{context}, generator_{std::move(generator)} {} - ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override { + ProtoResult computeResult(bool requestLaziness) override { + AD_CONTRACT_CHECK(requestLaziness); return {std::move(generator_), resultSortedOn(), LocalVocab{}}; } }; From a471bcf0e1dc5f3579272f890095208065900c4d Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:27:32 +0200 Subject: [PATCH 133/133] More PR comments --- test/ConcurrentCacheTest.cpp | 25 ++-- test/ExportQueryExecutionTreesTest.cpp | 45 ++++--- test/OperationTest.cpp | 95 ++++++-------- test/ResultTest.cpp | 169 +++++++++++-------------- test/util/OperationTestHelpers.h | 4 +- 5 files changed, 149 insertions(+), 189 deletions(-) diff --git a/test/ConcurrentCacheTest.cpp b/test/ConcurrentCacheTest.cpp index 0478a6a6e2..a0e092a464 100644 --- a/test/ConcurrentCacheTest.cpp +++ b/test/ConcurrentCacheTest.cpp @@ -484,11 +484,14 @@ TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) { SimpleConcurrentLruCache cache{}; - auto expectContainsSingleEntry = - [&](bool pinned, ad_utility::source_location l = - ad_utility::source_location::current()) { + auto expectContainsSingleElementAtKey0 = + [&](bool pinned, std::string expected, + ad_utility::source_location l = + ad_utility::source_location::current()) { using namespace ::testing; auto trace = generateLocationTrace(l); + auto value = cache.getIfContained(0); + EXPECT_THAT(value, hasValue(expected)); if (pinned) { EXPECT_NE(cache.pinnedSize(), 0_B); EXPECT_EQ(cache.nonPinnedSize(), 0_B); @@ -500,27 +503,19 @@ TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) { cache.tryInsertIfNotPresent(false, 0, std::make_shared("abc")); - auto value = cache.getIfContained(0); - EXPECT_THAT(value, hasValue("abc")); - expectContainsSingleEntry(false); + expectContainsSingleElementAtKey0(false, "abc"); cache.tryInsertIfNotPresent(false, 0, std::make_shared("def")); - value = cache.getIfContained(0); - EXPECT_THAT(value, hasValue("abc")); - expectContainsSingleEntry(false); + expectContainsSingleElementAtKey0(false, "abc"); cache.tryInsertIfNotPresent(true, 0, std::make_shared("ghi")); - value = cache.getIfContained(0); - EXPECT_THAT(value, hasValue("abc"s)); - expectContainsSingleEntry(true); + expectContainsSingleElementAtKey0(true, "abc"); cache.clearAll(); cache.tryInsertIfNotPresent(true, 0, std::make_shared("jkl")); - value = cache.getIfContained(0); - EXPECT_THAT(value, hasValue("jkl"s)); - expectContainsSingleEntry(true); + expectContainsSingleElementAtKey0(true, "jkl"); } diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 0128a0f460..76f8ee9a79 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -212,6 +212,16 @@ static std::string makeXMLHeader( // The end of a SPARQL XML export. static const std::string xmlTrailer = "\n\n"; +// Helper function for easier testing of the `IdTable` generator. +std::vector convertToVector( + cppcoro::generator generator) { + std::vector result; + for (const IdTable& idTable : generator) { + result.push_back(idTable.clone()); + } + return result; +} + // Template is only required because inner class is not visible template std::vector convertToVector(cppcoro::generator generator) { @@ -1089,40 +1099,29 @@ INSTANTIATE_TEST_SUITE_P(StreamableMediaTypes, StreamableMediaTypesFixture, TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator) { auto idTable = makeIdTableFromVector({{42}, {1337}}); - Result result{std::move(idTable), {}, LocalVocab{}}; + Result result{idTable.clone(), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getIdTables(result); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(*iterator, makeIdTableFromVector({{42}, {1337}})); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + EXPECT_THAT(convertToVector(std::move(generator)), + ElementsAre(Eq(std::cref(idTable)))); } // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; + IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); + IdTable idTable2 = makeIdTableFromVector({{42}, {1337}}); + auto tableGenerator = [](IdTable idTableA, + IdTable idTableB) -> cppcoro::generator { + co_yield idTableA; - IdTable idTable2 = makeIdTableFromVector({{42}, {1337}}); - co_yield idTable2; - }(); + co_yield idTableB; + }(idTable1.clone(), idTable2.clone()); Result result{std::move(tableGenerator), {}, LocalVocab{}}; auto generator = ExportQueryExecutionTrees::getIdTables(result); - auto iterator = generator.begin(); - ASSERT_NE(iterator, generator.end()); - ASSERT_EQ(*iterator, makeIdTableFromVector({{1}, {2}, {3}})); - - ++iterator; - ASSERT_NE(iterator, generator.end()); - ASSERT_EQ(*iterator, makeIdTableFromVector({{42}, {1337}})); - - ++iterator; - EXPECT_EQ(iterator, generator.end()); + EXPECT_THAT(convertToVector(std::move(generator)), + ElementsAre(Eq(std::cref(idTable1)), Eq(std::cref(idTable2)))); } // _____________________________________________________________________________ diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 820fcd3913..9cd5022f14 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -12,9 +12,11 @@ using namespace ad_utility::testing; using namespace ::testing; +using ad_utility::CacheStatus; using ad_utility::CancellationException; using ad_utility::CancellationHandle; using ad_utility::CancellationState; +using Status = RuntimeInformation::Status; namespace { // Helper function to perform actions at various stages of a generator @@ -31,6 +33,14 @@ auto expectAtEachStageOfGenerator( } EXPECT_EQ(index, functions.size()); } + +void expectRtiHasDimensions( + RuntimeInformation& rti, uint64_t cols, uint64_t rows, + ad_utility::source_location l = ad_utility::source_location::current()) { + auto locationTrace = generateLocationTrace(l); + EXPECT_EQ(rti.numCols_, cols); + EXPECT_EQ(rti.numRows_, rows); +} } // namespace // ________________________________________________ @@ -56,7 +66,7 @@ TEST(OperationTest, getResultOnlyCached) { // The second `true` means "only read the result if it was cached". // We have just cleared the cache, and so this should return `nullptr`. EXPECT_EQ(n.getResult(true, ComputationMode::ONLY_IF_CACHED), nullptr); - EXPECT_EQ(n.runtimeInfo().status_, RuntimeInformation::Status::notStarted); + EXPECT_EQ(n.runtimeInfo().status_, Status::notStarted); // Nothing has been stored in the cache by this call. EXPECT_EQ(qec->getQueryTreeCache().numNonPinnedEntries(), 0); EXPECT_EQ(qec->getQueryTreeCache().numPinnedEntries(), 0); @@ -65,9 +75,8 @@ TEST(OperationTest, getResultOnlyCached) { NeutralElementOperation n2{qec}; auto result = n2.getResult(); EXPECT_NE(result, nullptr); - EXPECT_EQ(n2.runtimeInfo().status_, - RuntimeInformation::Status::fullyMaterialized); - EXPECT_EQ(n2.runtimeInfo().cacheStatus_, ad_utility::CacheStatus::computed); + EXPECT_EQ(n2.runtimeInfo().status_, Status::fullyMaterialized); + EXPECT_EQ(n2.runtimeInfo().cacheStatus_, CacheStatus::computed); EXPECT_EQ(qec->getQueryTreeCache().numNonPinnedEntries(), 1); EXPECT_EQ(qec->getQueryTreeCache().numPinnedEntries(), 0); @@ -75,8 +84,7 @@ TEST(OperationTest, getResultOnlyCached) { // get exactly the same `shared_ptr` as with the previous call. NeutralElementOperation n3{qec}; EXPECT_EQ(n3.getResult(true, ComputationMode::ONLY_IF_CACHED), result); - EXPECT_EQ(n3.runtimeInfo().cacheStatus_, - ad_utility::CacheStatus::cachedNotPinned); + EXPECT_EQ(n3.runtimeInfo().cacheStatus_, CacheStatus::cachedNotPinned); // We can even use the `onlyReadFromCache` case to upgrade a non-pinned // cache-entry to a pinned cache entry @@ -87,8 +95,7 @@ TEST(OperationTest, getResultOnlyCached) { // The cache status is `cachedNotPinned` because we found the element cached // but not pinned (it does reflect the status BEFORE the operation). - EXPECT_EQ(n4.runtimeInfo().cacheStatus_, - ad_utility::CacheStatus::cachedNotPinned); + EXPECT_EQ(n4.runtimeInfo().cacheStatus_, CacheStatus::cachedNotPinned); EXPECT_EQ(qec->getQueryTreeCache().numNonPinnedEntries(), 0); EXPECT_EQ(qec->getQueryTreeCache().numPinnedEntries(), 1); @@ -96,8 +103,7 @@ TEST(OperationTest, getResultOnlyCached) { // result. qecCopy._pinResult = false; EXPECT_EQ(n4.getResult(true, ComputationMode::ONLY_IF_CACHED), result); - EXPECT_EQ(n4.runtimeInfo().cacheStatus_, - ad_utility::CacheStatus::cachedPinned); + EXPECT_EQ(n4.runtimeInfo().cacheStatus_, CacheStatus::cachedPinned); // Clear the (global) cache again to not possibly interfere with other unit // tests. @@ -342,33 +348,24 @@ TEST(Operation, verifyRuntimeInformationIsUpdatedForLazyOperations) { auto& rti = valuesForTesting.runtimeInfo(); - EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(rti.status_, Status::lazilyMaterialized); EXPECT_EQ(rti.totalTime_, 0ms); EXPECT_EQ(rti.originalTotalTime_, 0ms); EXPECT_EQ(rti.originalOperationTime_, 0ms); - auto& idTables = result.idTables(); - - auto iterator = idTables.begin(); - ASSERT_NE(iterator, idTables.end()); - - EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); - EXPECT_EQ(rti.numCols_, 2); - EXPECT_EQ(rti.numRows_, 1); - - ++iterator; - ASSERT_NE(iterator, idTables.end()); - - EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); - EXPECT_EQ(rti.numCols_, 2); - EXPECT_EQ(rti.numRows_, 2); - - ++iterator; - ASSERT_EQ(iterator, idTables.end()); - - EXPECT_EQ(rti.status_, RuntimeInformation::Status::lazilyMaterialized); - EXPECT_EQ(rti.numCols_, 2); - EXPECT_EQ(rti.numRows_, 2); + expectAtEachStageOfGenerator( + std::move(result.idTables()), + {[&]() { + EXPECT_EQ(rti.status_, Status::lazilyMaterialized); + expectRtiHasDimensions(rti, 2, 1); + }, + [&]() { + EXPECT_EQ(rti.status_, Status::lazilyMaterialized); + expectRtiHasDimensions(rti, 2, 2); + }}); + + EXPECT_EQ(rti.status_, Status::lazilyMaterialized); + expectRtiHasDimensions(rti, 2, 2); } // _____________________________________________________________________________ @@ -386,13 +383,11 @@ TEST(Operation, ensureFailedStatusIsSetWhenGeneratorThrowsException) { auto result = operation.runComputation(timer, ComputationMode::LAZY_IF_SUPPORTED); - EXPECT_EQ(operation.runtimeInfo().status_, - RuntimeInformation::Status::lazilyMaterialized); + EXPECT_EQ(operation.runtimeInfo().status_, Status::lazilyMaterialized); EXPECT_THROW(result.idTables().begin(), std::runtime_error); - EXPECT_EQ(operation.runtimeInfo().status_, - RuntimeInformation::Status::failed); + EXPECT_EQ(operation.runtimeInfo().status_, Status::failed); EXPECT_TRUE(signaledUpdate); } @@ -495,29 +490,21 @@ TEST(Operation, verifyLimitIsProperlyAppliedAndUpdatesRuntimeInfoCorrectly) { auto& rti = valuesForTesting.runtimeInfo(); auto& childRti = *rti.children_.at(0); - EXPECT_EQ(rti.numCols_, 0); - EXPECT_EQ(rti.numRows_, 0); - EXPECT_EQ(childRti.numCols_, 0); - EXPECT_EQ(childRti.numRows_, 0); + expectRtiHasDimensions(rti, 0, 0); + expectRtiHasDimensions(childRti, 0, 0); expectAtEachStageOfGenerator(std::move(result.idTables()), {[&]() { - EXPECT_EQ(rti.numCols_, 2); - EXPECT_EQ(rti.numRows_, 0); - EXPECT_EQ(childRti.numCols_, 2); - EXPECT_EQ(childRti.numRows_, 1); + expectRtiHasDimensions(rti, 2, 0); + expectRtiHasDimensions(childRti, 2, 1); }, [&]() { - EXPECT_EQ(rti.numCols_, 2); - EXPECT_EQ(rti.numRows_, 1); - EXPECT_EQ(childRti.numCols_, 2); - EXPECT_EQ(childRti.numRows_, 3); + expectRtiHasDimensions(rti, 2, 1); + expectRtiHasDimensions(childRti, 2, 3); }}); - EXPECT_EQ(rti.numCols_, 2); - EXPECT_EQ(rti.numRows_, 1); - EXPECT_EQ(childRti.numCols_, 2); - EXPECT_EQ(childRti.numRows_, 3); + expectRtiHasDimensions(rti, 2, 1); + expectRtiHasDimensions(childRti, 2, 3); } // _____________________________________________________________________________ @@ -553,7 +540,7 @@ TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough) { EXPECT_EQ(newRti.totalTime_, oldRti.totalTime_); EXPECT_EQ(newRti.originalTotalTime_, oldRti.originalTotalTime_); EXPECT_EQ(newRti.originalOperationTime_, oldRti.originalOperationTime_); - EXPECT_EQ(newRti.status_, RuntimeInformation::Status::fullyMaterialized); + EXPECT_EQ(newRti.status_, Status::fullyMaterialized); const auto& aggregatedResult = aggregatedValue.value()._resultPointer->resultTable(); diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index 5d2c733e7a..5c8d1ce1a7 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -8,6 +8,9 @@ #include "util/IdTableHelpers.h" using namespace std::chrono_literals; +using testing::Combine; +using ::testing::HasSubstr; +using testing::Values; namespace { // Helper function to generate all possible splits of an IdTable in order to @@ -94,55 +97,49 @@ TEST(Result, verifyIdTablesThrowsWhenFullyMaterialized) { // _____________________________________________________________________________ using CIs = std::vector; -class ResultSortTestS : public testing::TestWithParam {}; -class ResultSortTestF : public testing::TestWithParam {}; +class ResultSortTest : public testing::TestWithParam> {}; -TEST_P(ResultSortTestS, verifyAssertSortOrderIsRespectedSucceedsWhenSorted) { +TEST_P(ResultSortTest, verifyAssertSortOrderIsRespectedSucceedsWhenSorted) { if constexpr (!ad_utility::areExpensiveChecksEnabled) { GTEST_SKIP_("Expensive checks are disabled, skipping test."); } auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), GetParam(), LocalVocab{}}; - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } - - EXPECT_NO_THROW((Result{std::move(idTable), GetParam(), LocalVocab{}})); -} - -INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultSortTestS, - testing::Values(CIs{}, CIs{0}, CIs{0, 1}, CIs{2, 0})); - -// _____________________________________________________________________________ -TEST_P(ResultSortTestF, verifyAssertSortOrderIsRespectedThrowsWhenNotSorted) { - if constexpr (!ad_utility::areExpensiveChecksEnabled) { - GTEST_SKIP_("Expensive checks are disabled, skipping test."); + Result result{std::move(generator), std::get<1>(GetParam()), LocalVocab{}}; + if (std::get<0>(GetParam())) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } else { + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + consumeGenerator(result.idTables()), + HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); + } } - auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), GetParam(), LocalVocab{}}; + if (std::get<0>(GetParam())) { + EXPECT_NO_THROW( + (Result{std::move(idTable), std::get<1>(GetParam()), LocalVocab{}})); + } else { AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - consumeGenerator(result.idTables()), - ::testing::HasSubstr("compareRowsBySortColumns"), - ad_utility::Exception); + (Result{std::move(idTable), std::get<1>(GetParam()), LocalVocab{}}), + HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); } - - AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( - (Result{std::move(idTable), GetParam(), LocalVocab{}}), - ::testing::HasSubstr("compareRowsBySortColumns"), ad_utility::Exception); } -INSTANTIATE_TEST_SUITE_P(FailureCases, ResultSortTestF, - testing::Values(CIs{1}, CIs{1, 0}, CIs{2, 1})); +INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultSortTest, + Combine(Values(true), + Values(CIs{}, CIs{0}, CIs{0, 1}, CIs{2, 0}))); + +INSTANTIATE_TEST_SUITE_P(FailureCases, ResultSortTest, + Combine(Values(false), + Values(CIs{1}, CIs{1, 0}, CIs{2, 1}))); // _____________________________________________________________________________ TEST(Result, verifyAnErrorIsThrownIfSortedByHasHigherIndicesThanTheTableHasColumns) { auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); using ad_utility::Exception; - auto matcher = ::testing::HasSubstr("colIndex < idTable.numColumns()"); + auto matcher = HasSubstr("colIndex < idTable.numColumns()"); AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( (Result{idTable.clone(), {3}, LocalVocab{}}), matcher, Exception); @@ -241,8 +238,7 @@ TEST(Result, verifyRunOnNewChunkCallsFinishOnError) { AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( consumeGenerator(result.idTables()), - ::testing::HasSubstr("verifyRunOnNewChunkCallsFinishOnError"), - std::runtime_error); + HasSubstr("verifyRunOnNewChunkCallsFinishOnError"), std::runtime_error); EXPECT_EQ(callCounterGenerator, 0); EXPECT_EQ(callCounterFinished, 1); @@ -406,51 +402,46 @@ TEST(Result, verifyApplyLimitOffsetHandlesZeroLimitCorrectly) { // _____________________________________________________________________________ using LIC = LimitOffsetClause; -class ResultLimitTestS : public testing::TestWithParam {}; -class ResultLimitTestF : public testing::TestWithParam {}; +class ResultLimitTest : public testing::TestWithParam> {}; -TEST_P(ResultLimitTestS, +TEST_P(ResultLimitTest, verifyAssertThatLimitWasRespectedDoesNotThrowIfLimitWasRespected) { auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); { Result result{idTable.clone(), {}, LocalVocab{}}; - EXPECT_NO_THROW(result.assertThatLimitWasRespected(GetParam())); + if (std::get<0>(GetParam())) { + EXPECT_NO_THROW( + result.assertThatLimitWasRespected(std::get<1>(GetParam()))); + } else { + EXPECT_THROW(result.assertThatLimitWasRespected(std::get<1>(GetParam())), + ad_utility::Exception); + } } for (auto& generator : getAllSubSplits(idTable)) { Result result{std::move(generator), {}, LocalVocab{}}; - result.assertThatLimitWasRespected(GetParam()); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } -} - -INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultLimitTestS, - testing::Values(LIC{}, LIC{4, 0}, LIC{4, 1337}, - LIC{42, 0}, LIC{42, 1337})); - -// _____________________________________________________________________________ -TEST_P(ResultLimitTestF, - verifyAssertThatLimitWasRespectedDoesThrowIfLimitWasNotRespected) { - auto idTable = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); - { - Result result{idTable.clone(), {}, LocalVocab{}}; - EXPECT_THROW(result.assertThatLimitWasRespected(GetParam()), - ad_utility::Exception); - } + result.assertThatLimitWasRespected(std::get<1>(GetParam())); - for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.assertThatLimitWasRespected(GetParam()); - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + if (std::get<0>(GetParam())) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } else { + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } } } -INSTANTIATE_TEST_SUITE_P(FailureCases, ResultLimitTestF, - testing::Values(LIC{3, 0}, LIC{3, 1}, LIC{3, 2})); +INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultLimitTest, + Combine(Values(true), + Values(LIC{}, LIC{4, 0}, LIC{4, 1337}, + LIC{42, 0}, LIC{42, 1337}))); + +INSTANTIATE_TEST_SUITE_P(FailureCases, ResultLimitTest, + Combine(Values(false), + Values(LIC{3, 0}, LIC{3, 1}, LIC{3, 2}))); // _____________________________________________________________________________ -class ResultDefinednessTestS : public testing::TestWithParam {}; -class ResultDefinednessTestF : public testing::TestWithParam {}; +class ResultDefinednessTest + : public testing::TestWithParam> {}; auto u = Id::makeUndefined(); auto correctTable1 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {3, 4}}); @@ -461,32 +452,7 @@ auto wrongTable1 = makeIdTableFromVector({{u, 7}, {1, 6}, {2, 5}, {3, 4}}); auto wrongTable2 = makeIdTableFromVector({{u, 7}, {u, 6}, {u, 5}, {u, 4}}); auto wrongTable3 = makeIdTableFromVector({{0, 7}, {1, 6}, {2, 5}, {u, 4}}); -TEST_P(ResultDefinednessTestS, - verifyCheckDefinednessDoesNotThrowIfColumnIsDefined) { - if constexpr (!ad_utility::areExpensiveChecksEnabled) { - GTEST_SKIP_("Expensive checks are disabled, skipping test."); - } - VariableToColumnMap map{ - {Variable{"?a"}, {0, ColumnIndexAndTypeInfo::AlwaysDefined}}, - {Variable{"?b"}, {1, ColumnIndexAndTypeInfo::PossiblyUndefined}}}; - - { - Result result{GetParam()->clone(), {}, LocalVocab{}}; - EXPECT_NO_THROW(result.checkDefinedness(map)); - } - for (auto& generator : getAllSubSplits(*GetParam())) { - Result result{std::move(generator), {}, LocalVocab{}}; - result.checkDefinedness(map); - EXPECT_NO_THROW(consumeGenerator(result.idTables())); - } -} - -INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultDefinednessTestS, - testing::Values(&correctTable1, &correctTable2, - &correctTable3, &correctTable4)); - -// _____________________________________________________________________________ -TEST_P(ResultDefinednessTestF, +TEST_P(ResultDefinednessTest, verifyCheckDefinednessDoesThrowIfColumnIsNotDefinedWhenClaimingItIs) { if constexpr (!ad_utility::areExpensiveChecksEnabled) { GTEST_SKIP_("Expensive checks are disabled, skipping test."); @@ -496,16 +462,29 @@ TEST_P(ResultDefinednessTestF, {Variable{"?b"}, {1, ColumnIndexAndTypeInfo::PossiblyUndefined}}}; { - Result result{GetParam()->clone(), {}, LocalVocab{}}; - EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); + Result result{std::get<1>(GetParam())->clone(), {}, LocalVocab{}}; + if (std::get<0>(GetParam())) { + EXPECT_NO_THROW(result.checkDefinedness(map)); + } else { + EXPECT_THROW(result.checkDefinedness(map), ad_utility::Exception); + } } - for (auto& generator : getAllSubSplits(*GetParam())) { + for (auto& generator : getAllSubSplits(*std::get<1>(GetParam()))) { Result result{std::move(generator), {}, LocalVocab{}}; result.checkDefinedness(map); - EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + if (std::get<0>(GetParam())) { + EXPECT_NO_THROW(consumeGenerator(result.idTables())); + } else { + EXPECT_THROW(consumeGenerator(result.idTables()), ad_utility::Exception); + } } } -INSTANTIATE_TEST_SUITE_P(FailureCases, ResultDefinednessTestF, - testing::Values(&wrongTable1, &wrongTable2, - &wrongTable3)); +INSTANTIATE_TEST_SUITE_P(SuccessCases, ResultDefinednessTest, + Combine(Values(true), + Values(&correctTable1, &correctTable2, + &correctTable3, &correctTable4))); + +INSTANTIATE_TEST_SUITE_P( + FailureCases, ResultDefinednessTest, + Combine(Values(false), Values(&wrongTable1, &wrongTable2, &wrongTable3))); diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index b3e8b473f8..480b76536a 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -88,7 +88,7 @@ class ShallowParentOperation : public Operation { // Operation that will throw on `computeResult` for testing. class AlwaysFailOperation : public Operation { std::vector getChildren() override { return {}; } - string getCacheKeyImpl() const override { AD_CONTRACT_CHECK(false); } + string getCacheKeyImpl() const override { AD_FAIL(); } string getDescriptor() const override { return "AlwaysFailOperationDescriptor"; } @@ -120,7 +120,7 @@ class AlwaysFailOperation : public Operation { class CustomGeneratorOperation : public Operation { cppcoro::generator generator_; std::vector getChildren() override { return {}; } - string getCacheKeyImpl() const override { AD_CONTRACT_CHECK(false); } + string getCacheKeyImpl() const override { AD_FAIL(); } string getDescriptor() const override { return "CustomGeneratorOperationDescriptor"; }