diff --git a/endToEndTests/test/queries/DetailsOrderByLimit.json b/endToEndTests/test/queries/DetailsOrderByLimit.json index d94bcad34..534573278 100644 --- a/endToEndTests/test/queries/DetailsOrderByLimit.json +++ b/endToEndTests/test/queries/DetailsOrderByLimit.json @@ -25,19 +25,6 @@ } }, "expectedQueryResult": [ - { - "age": 50, - "aminoAcidInsertions": null, - "country": "Switzerland", - "date": "2021-03-03", - "division": "Valais", - "gisaid_epi_isl": "EPI_ISL_1408062", - "nucleotideInsertions": "22204:CAGAA", - "pango_lineage": "B.1.1.7", - "qc_value": 0.97, - "region": "Europe", - "unsorted_date": "2020-11-24" - }, { "age": 4, "aminoAcidInsertions": "S:214:EPE", @@ -50,6 +37,19 @@ "qc_value": 0.98, "region": "Europe", "unsorted_date": null + }, + { + "age": 51, + "aminoAcidInsertions": null, + "country": "Switzerland", + "date": "2021-03-21", + "division": "Vaud", + "gisaid_epi_isl": "EPI_ISL_1597890", + "nucleotideInsertions": "22339:GCTGGT", + "pango_lineage": "B.1.1.7", + "qc_value": 0.96, + "region": null, + "unsorted_date": "2021-01-25" } ] } diff --git a/endToEndTests/test/queries/nOf_2of3_details.json b/endToEndTests/test/queries/nOf_2of3_details.json index 413ca4a38..4bb18ddec 100644 --- a/endToEndTests/test/queries/nOf_2of3_details.json +++ b/endToEndTests/test/queries/nOf_2of3_details.json @@ -3,7 +3,9 @@ "query": { "action": { "type": "Details", - "orderByFields": ["gisaid_epi_isl"] + "randomize": { + "seed": 1232 + } }, "filterExpression": { "type": "N-Of", @@ -30,17 +32,17 @@ }, "expectedQueryResult": [ { - "age": 50, + "age": 58, "aminoAcidInsertions": null, "country": "Switzerland", - "date": "2020-11-13", - "division": "Solothurn", - "gisaid_epi_isl": "EPI_ISL_1005148", - "nucleotideInsertions": "25701:CCC", - "pango_lineage": "B.1.221", - "qc_value": 0.92, + "date": "2021-04-28", + "division": "Basel-Stadt", + "gisaid_epi_isl": "EPI_ISL_2019235", + "nucleotideInsertions": null, + "pango_lineage": "B.1.1.7", + "qc_value": 0.9, "region": "Europe", - "unsorted_date": "2020-12-17" + "unsorted_date": "2021-01-22" }, { "age": 50, @@ -69,17 +71,17 @@ "unsorted_date": "2021-02-10" }, { - "age": 58, + "age": 50, "aminoAcidInsertions": null, "country": "Switzerland", - "date": "2021-04-28", - "division": "Basel-Stadt", - "gisaid_epi_isl": "EPI_ISL_2019235", - "nucleotideInsertions": null, - "pango_lineage": "B.1.1.7", - "qc_value": 0.9, + "date": "2020-11-13", + "division": "Solothurn", + "gisaid_epi_isl": "EPI_ISL_1005148", + "nucleotideInsertions": "25701:CCC", + "pango_lineage": "B.1.221", + "qc_value": 0.92, "region": "Europe", - "unsorted_date": "2021-01-22" + "unsorted_date": "2020-12-17" } ] } diff --git a/src/silo/query_engine/actions/action.cpp b/src/silo/query_engine/actions/action.cpp index e9b7c5439..413c6d0ee 100644 --- a/src/silo/query_engine/actions/action.cpp +++ b/src/silo/query_engine/actions/action.cpp @@ -169,24 +169,24 @@ std::optional parseOffset(const nlohmann::json& json) { } std::optional parseRandomizeSeed(const nlohmann::json& json) { - if (json.contains("randomize")) { - if (json["randomize"].is_boolean()) { - if (json["randomize"].get()) { - const uint32_t time_based_seed = - std::chrono::system_clock::now().time_since_epoch().count(); - return time_based_seed; - } - return std::nullopt; + if (!json.contains("randomize")) { + return std::nullopt; + } + if (json["randomize"].is_boolean()) { + if (json["randomize"].get()) { + const uint32_t time_based_seed = + std::chrono::system_clock::now().time_since_epoch().count(); + return time_based_seed; } - CHECK_SILO_QUERY( - json["randomize"].is_object() && json["randomize"].contains("seed") && - json["randomize"]["seed"].is_number_unsigned(), - "If the action contains 'randomize', it must be either a boolean or an object " - "containing an unsigned 'seed'" - ) - return json["randomize"]["seed"].get(); + return std::nullopt; } - return std::nullopt; + CHECK_SILO_QUERY( + json["randomize"].is_object() && json["randomize"].contains("seed") && + json["randomize"]["seed"].is_number_unsigned(), + "If the action contains 'randomize', it must be either a boolean or an object " + "containing an unsigned 'seed'" + ) + return json["randomize"]["seed"].get(); } // NOLINTNEXTLINE(readability-identifier-naming) diff --git a/src/silo/query_engine/actions/details.cpp b/src/silo/query_engine/actions/details.cpp index b2c7f4159..a352b414d 100644 --- a/src/silo/query_engine/actions/details.cpp +++ b/src/silo/query_engine/actions/details.cpp @@ -131,6 +131,7 @@ std::vector produceSortedTuplesWithLimit( my_tuples.back() = current_tuple; std::push_heap(my_tuples.begin(), my_tuples.end(), tuple_comparator); } + iterator++; for (; iterator != end; iterator++) { tuple_factory.overwrite(current_tuple, *iterator); if (tuple_comparator(current_tuple, my_tuples.front())) { diff --git a/src/silo/test/randomize.test.cpp b/src/silo/test/randomize.test.cpp index 60e1c3f4f..3c393ed9b 100644 --- a/src/silo/test/randomize.test.cpp +++ b/src/silo/test/randomize.test.cpp @@ -4,34 +4,49 @@ #include "silo/test/query_fixture.test.h" +using nlohmann::json; + using silo::ReferenceGenomes; using silo::config::DatabaseConfig; using silo::config::ValueType; using silo::test::QueryTestData; using silo::test::QueryTestScenario; -const std::vector DATA = { - {{"metadata", {{"key", "id1"}, {"col", "A"}}}, - {"alignedNucleotideSequences", {{"segment1", nullptr}}}, - {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, - {"alignedAminoAcidSequences", {{"gene1", nullptr}}}}, - {{"metadata", {{"key", "id2"}, {"col", "A"}}}, - {"alignedNucleotideSequences", {{"segment1", nullptr}}}, - {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, - {"alignedAminoAcidSequences", {{"gene1", nullptr}}}}, - {{"metadata", {{"key", "id3"}, {"col", "A"}}}, - {"alignedNucleotideSequences", {{"segment1", nullptr}}}, - {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, - {"alignedAminoAcidSequences", {{"gene1", nullptr}}}}, - {{"metadata", {{"key", "id4"}, {"col", "A"}}}, - {"alignedNucleotideSequences", {{"segment1", nullptr}}}, - {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, - {"alignedAminoAcidSequences", {{"gene1", nullptr}}}}, - {{"metadata", {{"key", "id5"}, {"col", "A"}}}, - {"alignedNucleotideSequences", {{"segment1", nullptr}}}, - {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, - {"alignedAminoAcidSequences", {{"gene1", nullptr}}}} -}; +const auto DATA_JSON = R"([ + { + "metadata": {"key": "id1", "col": "A"}, + "alignedNucleotideSequences": {"segment1": null}, + "unalignedNucleotideSequences": {"segment1": null}, + "alignedAminoAcidSequences": {"gene1": null} + }, + { + "metadata": {"key": "id2", "col": "B"}, + "alignedNucleotideSequences": {"segment1": null}, + "unalignedNucleotideSequences": {"segment1": null}, + "alignedAminoAcidSequences": {"gene1": null} + }, + { + "metadata": {"key": "id3", "col": "A"}, + "alignedNucleotideSequences": {"segment1": null}, + "unalignedNucleotideSequences": {"segment1": null}, + "alignedAminoAcidSequences": {"gene1": null} + }, + { + "metadata": {"key": "id4", "col": "B"}, + "alignedNucleotideSequences": {"segment1": null}, + "unalignedNucleotideSequences": {"segment1": null}, + "alignedAminoAcidSequences": {"gene1": null} + }, + { + "metadata": {"key": "id5", "col": "A"}, + "alignedNucleotideSequences": {"segment1": null}, + "unalignedNucleotideSequences": {"segment1": null}, + "alignedAminoAcidSequences": {"gene1": null} + } +])"; + +// Parsing the JSON string to a json object +const std::vector DATA = json::parse(DATA_JSON); const auto DATABASE_CONFIG = DatabaseConfig{ "segment1", @@ -47,39 +62,143 @@ const QueryTestData TEST_DATA{DATA, DATABASE_CONFIG, REFERENCE_GENOMES}; const QueryTestScenario RANDOMIZE_SEED = { "seed1231ProvidedShouldShuffleResults", - {{"action", {{"type", "Details"}, {"fields", {"key"}}, {"randomize", {{"seed", 1231}}}}}, - {"filterExpression", {{"type", "True"}}}}, - {{{"key", "id4"}}, {{"key", "id1"}}, {{"key", "id5"}}, {{"key", "id2"}}, {{"key", "id3"}}} + json::parse( + R"({"action": {"type": "Details", "fields": ["key"], "randomize": {"seed": 1231}}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"key": "id4"}, + {"key": "id1"}, + {"key": "id5"}, + {"key": "id2"}, + {"key": "id3"}])" + ) }; const QueryTestScenario RANDOMIZE_SEED_DIFFERENT = { "seed12312ProvidedShouldShuffleResultsDifferently", - {{"action", {{"type", "Details"}, {"fields", {"key"}}, {"randomize", {{"seed", 12312}}}}}, - {"filterExpression", {{"type", "True"}}}}, - {{{"key", "id1"}}, {{"key", "id4"}}, {{"key", "id3"}}, {{"key", "id2"}}, {{"key", "id5"}}} + json::parse( + R"({"action": {"type": "Details", "fields": ["key"], "randomize": {"seed": 12312}}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"key": "id1"}, + {"key": "id4"}, + {"key": "id3"}, + {"key": "id2"}, + {"key": "id5"}])" + ) }; const QueryTestScenario EXPLICIT_DO_NOT_RANDOMIZE = { "explicitlyDoNotRandomize", - {{"action", {{"type", "Details"}, {"fields", {"key"}}, {"randomize", false}}}, - {"filterExpression", {{"type", "True"}}}}, - {{{"key", "id1"}}, {{"key", "id2"}}, {{"key", "id3"}}, {{"key", "id4"}}, {{"key", "id5"}}} + json::parse( + R"({"action": {"type": "Details", "fields": ["key"], "randomize": false}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"key": "id1"}, + {"key": "id2"}, + {"key": "id3"}, + {"key": "id4"}, + {"key": "id5"}])" + ) }; const QueryTestScenario AGGREGATE = { "aggregateRandomize", - {{"action", - {{"type", "Aggregated"}, {"groupByFields", {"key"}}, {"randomize", {{"seed", 12321}}}}}, - {"filterExpression", {{"type", "True"}}}}, - {{{"count", 1}, {"key", "id3"}}, - {{"count", 1}, {"key", "id1"}}, - {{"count", 1}, {"key", "id4"}}, - {{"count", 1}, {"key", "id5"}}, - {{"count", 1}, {"key", "id2"}}} + json::parse( + R"({"action": {"type": "Aggregated", "groupByFields": ["key"], "randomize": {"seed": 12321}}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"count": 1, "key": "id3"}, + {"count": 1, "key": "id1"}, + {"count": 1, "key": "id4"}, + {"count": 1, "key": "id5"}, + {"count": 1, "key": "id2"}])" + ) +}; + +const QueryTestScenario ORDER_BY_PRECEDENCE = { + "orderByTakePrecedenceOverRandomize", + json::parse( + R"({"action": {"type": "Details", "fields": ["key", "col"], "randomize": {"seed": 123212}, "orderByFields": ["col"]}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"key": "id5", "col": "A"}, + {"key": "id1", "col": "A"}, + {"key": "id3", "col": "A"}, + {"key": "id2", "col": "B"}, + {"key": "id4", "col": "B"}])" + ) +}; + +const QueryTestScenario ORDER_BY_AGGREGATE_RANDOMIZE = { + "orderingByAggregatedCount", + json::parse( + R"({"action": {"type": "Aggregated", "groupByFields": ["col"], "randomize": true, "orderByFields": ["count"]}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"count": 2, "col": "B"}, + {"count": 3, "col": "A"}])" + ) +}; + +const QueryTestScenario LIMIT_2_RANDOMIZE = { + "detailsWithLimit2AndOffsetRandomized", + json::parse( + R"({"action": {"type": "Details", "fields": ["key", "col"], + "orderByFields": ["col", "key"], "limit": 2, "offset": 2}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"key": "id5", "col": "A"}, + {"key": "id2", "col": "B"}])" + ) +}; + +const QueryTestScenario LIMIT_3_RANDOMIZE = { + "detailsWithLimit3AndOffsetRandomized", + json::parse( + R"({"action": {"type": "Details", "fields": ["key", "col"], + "orderByFields": ["col", "key"], "limit": 3, "offset": 2}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"key": "id5", "col": "A"}, + {"key": "id2", "col": "B"}, + {"key": "id4", "col": "B"}])" + ) +}; + +const QueryTestScenario AGGREGATE_LIMIT_RANDOMIZE = { + "aggregateWithLimitAndOffsetRandomized", + json::parse( + R"({"action": {"type": "Aggregated", "groupByFields": ["key", "col"], "randomize": {"seed": 123}, + "orderByFields": ["col"], "limit": 2, "offset": 1}, + "filterExpression": {"type": "True"}})" + ), + json::parse( + R"([{"count": 1, "key": "id1", "col": "A"}, + {"count": 1, "key": "id3", "col": "A"}])" + ) }; QUERY_TEST( RandomizeTest, TEST_DATA, - ::testing::Values(RANDOMIZE_SEED, RANDOMIZE_SEED_DIFFERENT, EXPLICIT_DO_NOT_RANDOMIZE, AGGREGATE) + ::testing::Values( + RANDOMIZE_SEED, + RANDOMIZE_SEED_DIFFERENT, + EXPLICIT_DO_NOT_RANDOMIZE, + AGGREGATE, + ORDER_BY_PRECEDENCE, + ORDER_BY_AGGREGATE_RANDOMIZE, + LIMIT_2_RANDOMIZE, + LIMIT_3_RANDOMIZE, + AGGREGATE_LIMIT_RANDOMIZE + ) );