Skip to content

Commit

Permalink
more tests for randomize and critical bugfix which duplicated the 'of…
Browse files Browse the repository at this point in the history
…fset+limit'th many value would be inserted twice in the heap-sort algorithm
  • Loading branch information
Taepper committed Feb 27, 2024
1 parent 0773999 commit 374a0be
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 86 deletions.
26 changes: 13 additions & 13 deletions endToEndTests/test/queries/DetailsOrderByLimit.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,6 @@
}
},
"expectedQueryResult": [
{
"age": 50,
"aminoAcidInsertions": null,
"country": "Switzerland",
"date": "2021-03-03",
"division": "Valais",
"gisaid_epi_isl": "EPI_ISL_1408062",
"nucleotideInsertions": "22204:CAGAA",
"pango_lineage": "B.1.1.7",
"qc_value": 0.97,
"region": "Europe",
"unsorted_date": "2020-11-24"
},
{
"age": 4,
"aminoAcidInsertions": "S:214:EPE",
Expand All @@ -50,6 +37,19 @@
"qc_value": 0.98,
"region": "Europe",
"unsorted_date": null
},
{
"age": 51,
"aminoAcidInsertions": null,
"country": "Switzerland",
"date": "2021-03-21",
"division": "Vaud",
"gisaid_epi_isl": "EPI_ISL_1597890",
"nucleotideInsertions": "22339:GCTGGT",
"pango_lineage": "B.1.1.7",
"qc_value": 0.96,
"region": null,
"unsorted_date": "2021-01-25"
}
]
}
36 changes: 19 additions & 17 deletions endToEndTests/test/queries/nOf_2of3_details.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
"query": {
"action": {
"type": "Details",
"orderByFields": ["gisaid_epi_isl"]
"randomize": {
"seed": 1232
}
},
"filterExpression": {
"type": "N-Of",
Expand All @@ -30,17 +32,17 @@
},
"expectedQueryResult": [
{
"age": 50,
"age": 58,
"aminoAcidInsertions": null,
"country": "Switzerland",
"date": "2020-11-13",
"division": "Solothurn",
"gisaid_epi_isl": "EPI_ISL_1005148",
"nucleotideInsertions": "25701:CCC",
"pango_lineage": "B.1.221",
"qc_value": 0.92,
"date": "2021-04-28",
"division": "Basel-Stadt",
"gisaid_epi_isl": "EPI_ISL_2019235",
"nucleotideInsertions": null,
"pango_lineage": "B.1.1.7",
"qc_value": 0.9,
"region": "Europe",
"unsorted_date": "2020-12-17"
"unsorted_date": "2021-01-22"
},
{
"age": 50,
Expand Down Expand Up @@ -69,17 +71,17 @@
"unsorted_date": "2021-02-10"
},
{
"age": 58,
"age": 50,
"aminoAcidInsertions": null,
"country": "Switzerland",
"date": "2021-04-28",
"division": "Basel-Stadt",
"gisaid_epi_isl": "EPI_ISL_2019235",
"nucleotideInsertions": null,
"pango_lineage": "B.1.1.7",
"qc_value": 0.9,
"date": "2020-11-13",
"division": "Solothurn",
"gisaid_epi_isl": "EPI_ISL_1005148",
"nucleotideInsertions": "25701:CCC",
"pango_lineage": "B.1.221",
"qc_value": 0.92,
"region": "Europe",
"unsorted_date": "2021-01-22"
"unsorted_date": "2020-12-17"
}
]
}
32 changes: 16 additions & 16 deletions src/silo/query_engine/actions/action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,24 +169,24 @@ std::optional<uint32_t> parseOffset(const nlohmann::json& json) {
}

std::optional<uint32_t> parseRandomizeSeed(const nlohmann::json& json) {
if (json.contains("randomize")) {
if (json["randomize"].is_boolean()) {
if (json["randomize"].get<bool>()) {
const uint32_t time_based_seed =
std::chrono::system_clock::now().time_since_epoch().count();
return time_based_seed;
}
return std::nullopt;
if (!json.contains("randomize")) {
return std::nullopt;
}
if (json["randomize"].is_boolean()) {
if (json["randomize"].get<bool>()) {
const uint32_t time_based_seed =
std::chrono::system_clock::now().time_since_epoch().count();
return time_based_seed;
}
CHECK_SILO_QUERY(
json["randomize"].is_object() && json["randomize"].contains("seed") &&
json["randomize"]["seed"].is_number_unsigned(),
"If the action contains 'randomize', it must be either a boolean or an object "
"containing an unsigned 'seed'"
)
return json["randomize"]["seed"].get<uint32_t>();
return std::nullopt;
}
return std::nullopt;
CHECK_SILO_QUERY(
json["randomize"].is_object() && json["randomize"].contains("seed") &&
json["randomize"]["seed"].is_number_unsigned(),
"If the action contains 'randomize', it must be either a boolean or an object "
"containing an unsigned 'seed'"
)
return json["randomize"]["seed"].get<uint32_t>();
}

// NOLINTNEXTLINE(readability-identifier-naming)
Expand Down
1 change: 1 addition & 0 deletions src/silo/query_engine/actions/details.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ std::vector<actions::Tuple> produceSortedTuplesWithLimit(
my_tuples.back() = current_tuple;
std::push_heap(my_tuples.begin(), my_tuples.end(), tuple_comparator);
}
iterator++;
for (; iterator != end; iterator++) {
tuple_factory.overwrite(current_tuple, *iterator);
if (tuple_comparator(current_tuple, my_tuples.front())) {
Expand Down
199 changes: 159 additions & 40 deletions src/silo/test/randomize.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,49 @@

#include "silo/test/query_fixture.test.h"

using nlohmann::json;

using silo::ReferenceGenomes;
using silo::config::DatabaseConfig;
using silo::config::ValueType;
using silo::test::QueryTestData;
using silo::test::QueryTestScenario;

const std::vector<nlohmann::json> DATA = {
{{"metadata", {{"key", "id1"}, {"col", "A"}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}},
{{"metadata", {{"key", "id2"}, {"col", "A"}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}},
{{"metadata", {{"key", "id3"}, {"col", "A"}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}},
{{"metadata", {{"key", "id4"}, {"col", "A"}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}},
{{"metadata", {{"key", "id5"}, {"col", "A"}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}}
};
const auto DATA_JSON = R"([
{
"metadata": {"key": "id1", "col": "A"},
"alignedNucleotideSequences": {"segment1": null},
"unalignedNucleotideSequences": {"segment1": null},
"alignedAminoAcidSequences": {"gene1": null}
},
{
"metadata": {"key": "id2", "col": "B"},
"alignedNucleotideSequences": {"segment1": null},
"unalignedNucleotideSequences": {"segment1": null},
"alignedAminoAcidSequences": {"gene1": null}
},
{
"metadata": {"key": "id3", "col": "A"},
"alignedNucleotideSequences": {"segment1": null},
"unalignedNucleotideSequences": {"segment1": null},
"alignedAminoAcidSequences": {"gene1": null}
},
{
"metadata": {"key": "id4", "col": "B"},
"alignedNucleotideSequences": {"segment1": null},
"unalignedNucleotideSequences": {"segment1": null},
"alignedAminoAcidSequences": {"gene1": null}
},
{
"metadata": {"key": "id5", "col": "A"},
"alignedNucleotideSequences": {"segment1": null},
"unalignedNucleotideSequences": {"segment1": null},
"alignedAminoAcidSequences": {"gene1": null}
}
])";

// Parsing the JSON string to a json object
const std::vector<json> DATA = json::parse(DATA_JSON);

const auto DATABASE_CONFIG = DatabaseConfig{
"segment1",
Expand All @@ -47,39 +62,143 @@ const QueryTestData TEST_DATA{DATA, DATABASE_CONFIG, REFERENCE_GENOMES};

const QueryTestScenario RANDOMIZE_SEED = {
"seed1231ProvidedShouldShuffleResults",
{{"action", {{"type", "Details"}, {"fields", {"key"}}, {"randomize", {{"seed", 1231}}}}},
{"filterExpression", {{"type", "True"}}}},
{{{"key", "id4"}}, {{"key", "id1"}}, {{"key", "id5"}}, {{"key", "id2"}}, {{"key", "id3"}}}
json::parse(
R"({"action": {"type": "Details", "fields": ["key"], "randomize": {"seed": 1231}},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"key": "id4"},
{"key": "id1"},
{"key": "id5"},
{"key": "id2"},
{"key": "id3"}])"
)
};

const QueryTestScenario RANDOMIZE_SEED_DIFFERENT = {
"seed12312ProvidedShouldShuffleResultsDifferently",
{{"action", {{"type", "Details"}, {"fields", {"key"}}, {"randomize", {{"seed", 12312}}}}},
{"filterExpression", {{"type", "True"}}}},
{{{"key", "id1"}}, {{"key", "id4"}}, {{"key", "id3"}}, {{"key", "id2"}}, {{"key", "id5"}}}
json::parse(
R"({"action": {"type": "Details", "fields": ["key"], "randomize": {"seed": 12312}},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"key": "id1"},
{"key": "id4"},
{"key": "id3"},
{"key": "id2"},
{"key": "id5"}])"
)
};

const QueryTestScenario EXPLICIT_DO_NOT_RANDOMIZE = {
"explicitlyDoNotRandomize",
{{"action", {{"type", "Details"}, {"fields", {"key"}}, {"randomize", false}}},
{"filterExpression", {{"type", "True"}}}},
{{{"key", "id1"}}, {{"key", "id2"}}, {{"key", "id3"}}, {{"key", "id4"}}, {{"key", "id5"}}}
json::parse(
R"({"action": {"type": "Details", "fields": ["key"], "randomize": false},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"key": "id1"},
{"key": "id2"},
{"key": "id3"},
{"key": "id4"},
{"key": "id5"}])"
)
};

const QueryTestScenario AGGREGATE = {
"aggregateRandomize",
{{"action",
{{"type", "Aggregated"}, {"groupByFields", {"key"}}, {"randomize", {{"seed", 12321}}}}},
{"filterExpression", {{"type", "True"}}}},
{{{"count", 1}, {"key", "id3"}},
{{"count", 1}, {"key", "id1"}},
{{"count", 1}, {"key", "id4"}},
{{"count", 1}, {"key", "id5"}},
{{"count", 1}, {"key", "id2"}}}
json::parse(
R"({"action": {"type": "Aggregated", "groupByFields": ["key"], "randomize": {"seed": 12321}},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"count": 1, "key": "id3"},
{"count": 1, "key": "id1"},
{"count": 1, "key": "id4"},
{"count": 1, "key": "id5"},
{"count": 1, "key": "id2"}])"
)
};

const QueryTestScenario ORDER_BY_PRECEDENCE = {
"orderByTakePrecedenceOverRandomize",
json::parse(
R"({"action": {"type": "Details", "fields": ["key", "col"], "randomize": {"seed": 123212}, "orderByFields": ["col"]},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"key": "id5", "col": "A"},
{"key": "id1", "col": "A"},
{"key": "id3", "col": "A"},
{"key": "id2", "col": "B"},
{"key": "id4", "col": "B"}])"
)
};

const QueryTestScenario ORDER_BY_AGGREGATE_RANDOMIZE = {
"orderingByAggregatedCount",
json::parse(
R"({"action": {"type": "Aggregated", "groupByFields": ["col"], "randomize": true, "orderByFields": ["count"]},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"count": 2, "col": "B"},
{"count": 3, "col": "A"}])"
)
};

const QueryTestScenario LIMIT_2_RANDOMIZE = {
"detailsWithLimit2AndOffsetRandomized",
json::parse(
R"({"action": {"type": "Details", "fields": ["key", "col"],
"orderByFields": ["col", "key"], "limit": 2, "offset": 2},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"key": "id5", "col": "A"},
{"key": "id2", "col": "B"}])"
)
};

const QueryTestScenario LIMIT_3_RANDOMIZE = {
"detailsWithLimit3AndOffsetRandomized",
json::parse(
R"({"action": {"type": "Details", "fields": ["key", "col"],
"orderByFields": ["col", "key"], "limit": 3, "offset": 2},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"key": "id5", "col": "A"},
{"key": "id2", "col": "B"},
{"key": "id4", "col": "B"}])"
)
};

const QueryTestScenario AGGREGATE_LIMIT_RANDOMIZE = {
"aggregateWithLimitAndOffsetRandomized",
json::parse(
R"({"action": {"type": "Aggregated", "groupByFields": ["key", "col"], "randomize": {"seed": 123},
"orderByFields": ["col"], "limit": 2, "offset": 1},
"filterExpression": {"type": "True"}})"
),
json::parse(
R"([{"count": 1, "key": "id1", "col": "A"},
{"count": 1, "key": "id3", "col": "A"}])"
)
};

QUERY_TEST(
RandomizeTest,
TEST_DATA,
::testing::Values(RANDOMIZE_SEED, RANDOMIZE_SEED_DIFFERENT, EXPLICIT_DO_NOT_RANDOMIZE, AGGREGATE)
::testing::Values(
RANDOMIZE_SEED,
RANDOMIZE_SEED_DIFFERENT,
EXPLICIT_DO_NOT_RANDOMIZE,
AGGREGATE,
ORDER_BY_PRECEDENCE,
ORDER_BY_AGGREGATE_RANDOMIZE,
LIMIT_2_RANDOMIZE,
LIMIT_3_RANDOMIZE,
AGGREGATE_LIMIT_RANDOMIZE
)
);

0 comments on commit 374a0be

Please sign in to comment.