diff --git a/endToEndTests/test/queries/GroupByDivision.json b/endToEndTests/test/queries/GroupByDivision.json index 754bf2f3d..cb9807b5e 100644 --- a/endToEndTests/test/queries/GroupByDivision.json +++ b/endToEndTests/test/queries/GroupByDivision.json @@ -11,6 +11,10 @@ } }, "expectedQueryResult": [ + { + "count": 2, + "division": null + }, { "count": 6, "division": "Aargau" @@ -24,7 +28,7 @@ "division": "Basel-Stadt" }, { - "count": 10, + "count": 9, "division": "Bern" }, { @@ -56,7 +60,7 @@ "division": "Schwyz" }, { - "count": 16, + "count": 15, "division": "Solothurn" }, { diff --git a/endToEndTests/test/queries/GroupByLineage.json b/endToEndTests/test/queries/GroupByLineage.json index 806650c7a..cfc281041 100644 --- a/endToEndTests/test/queries/GroupByLineage.json +++ b/endToEndTests/test/queries/GroupByLineage.json @@ -11,6 +11,10 @@ } }, "expectedQueryResult": [ + { + "count": 1, + "pango_lineage": null + }, { "count": 3, "pango_lineage": "B.1" @@ -95,10 +99,6 @@ "count": 1, "pango_lineage": "B.1.617.2.122" }, - { - "count": 1, - "pango_lineage": "B.1.617.2.42" - }, { "count": 4, "pango_lineage": "B.1.617.2.43" diff --git a/endToEndTests/test/queries/OrderByAge.json b/endToEndTests/test/queries/OrderByAge.json new file mode 100644 index 000000000..c47c691cf --- /dev/null +++ b/endToEndTests/test/queries/OrderByAge.json @@ -0,0 +1,71 @@ +{ + "testCaseName": "Order By age column ascending", + "query": { + "action": { + "type": "Aggregated", + "groupByFields": ["age"], + "orderByFields": ["age asc"] + }, + "filterExpression": { + "type": "True" + } + }, + "expectedQueryResult": [ + { + "age": null, + "count": 2 + }, + { + "age": 4, + "count": 2 + }, + { + "age": 5, + "count": 1 + }, + { + "age": 6, + "count": 1 + }, + { + "age": 50, + "count": 17 + }, + { + "age": 51, + "count": 7 + }, + { + "age": 52, + "count": 8 + }, + { + "age": 53, + "count": 8 + }, + { + "age": 54, + "count": 9 + }, + { + "age": 55, + "count": 8 + }, + { + "age": 56, + "count": 9 + }, + { + "age": 57, + "count": 10 + }, + { + "age": 58, + "count": 9 + }, + { + "age": 59, + "count": 9 + } + ] +} diff --git a/endToEndTests/test/queries/OrderByFloat.json b/endToEndTests/test/queries/OrderByFloat.json index 700e8d8a8..a2d5eccf3 100644 --- a/endToEndTests/test/queries/OrderByFloat.json +++ b/endToEndTests/test/queries/OrderByFloat.json @@ -1,10 +1,10 @@ { - "testCaseName": "Order By QC float column", + "testCaseName": "Order By QC float column ascending", "query": { "action": { "type": "Aggregated", "groupByFields": ["qc_value"], - "orderByFields": ["qc_value desc"] + "orderByFields": ["qc_value asc"] }, "filterExpression": { "type": "True" @@ -12,24 +12,24 @@ }, "expectedQueryResult": [ { - "count": 10, - "qc_value": 0.98 + "count": 2, + "qc_value": null }, { "count": 10, - "qc_value": 0.97 + "qc_value": 0.89 }, { "count": 10, - "qc_value": 0.96 + "qc_value": 0.9 }, { "count": 10, - "qc_value": 0.95 + "qc_value": 0.91 }, { "count": 10, - "qc_value": 0.94 + "qc_value": 0.92 }, { "count": 10, @@ -37,19 +37,23 @@ }, { "count": 10, - "qc_value": 0.92 + "qc_value": 0.94 + }, + { + "count": 8, + "qc_value": 0.95 }, { "count": 10, - "qc_value": 0.91 + "qc_value": 0.96 }, { "count": 10, - "qc_value": 0.9 + "qc_value": 0.97 }, { "count": 10, - "qc_value": 0.89 + "qc_value": 0.98 } ] } diff --git a/endToEndTests/test/queries/OrderByFloatDesc.json b/endToEndTests/test/queries/OrderByFloatDesc.json new file mode 100644 index 000000000..7f96ff16e --- /dev/null +++ b/endToEndTests/test/queries/OrderByFloatDesc.json @@ -0,0 +1,59 @@ +{ + "testCaseName": "Order By QC float column descending", + "query": { + "action": { + "type": "Aggregated", + "groupByFields": ["qc_value"], + "orderByFields": ["qc_value desc"] + }, + "filterExpression": { + "type": "True" + } + }, + "expectedQueryResult": [ + { + "count": 10, + "qc_value": 0.98 + }, + { + "count": 10, + "qc_value": 0.97 + }, + { + "count": 10, + "qc_value": 0.96 + }, + { + "count": 8, + "qc_value": 0.95 + }, + { + "count": 10, + "qc_value": 0.94 + }, + { + "count": 10, + "qc_value": 0.93 + }, + { + "count": 10, + "qc_value": 0.92 + }, + { + "count": 10, + "qc_value": 0.91 + }, + { + "count": 10, + "qc_value": 0.9 + }, + { + "count": 10, + "qc_value": 0.89 + }, + { + "count": 2, + "qc_value": null + } + ] +} diff --git a/endToEndTests/test/queries/OrderByFloatFiltered.json b/endToEndTests/test/queries/OrderByFloatFiltered.json new file mode 100644 index 000000000..b9d69f17c --- /dev/null +++ b/endToEndTests/test/queries/OrderByFloatFiltered.json @@ -0,0 +1,58 @@ +{ + "testCaseName": "Order By QC float column ascending after filtering", + "query": { + "action": { + "type": "Aggregated", + "groupByFields": ["qc_value"], + "orderByFields": ["qc_value asc"] + }, + "filterExpression": { + "from": 0.1, + "to": 2121.1, + "column": "qc_value", + "type": "FloatBetween" + } + }, + "expectedQueryResult": [ + { + "count": 10, + "qc_value": 0.89 + }, + { + "count": 10, + "qc_value": 0.9 + }, + { + "count": 10, + "qc_value": 0.91 + }, + { + "count": 10, + "qc_value": 0.92 + }, + { + "count": 10, + "qc_value": 0.93 + }, + { + "count": 10, + "qc_value": 0.94 + }, + { + "count": 8, + "qc_value": 0.95 + }, + { + "count": 10, + "qc_value": 0.96 + }, + { + "count": 10, + "qc_value": 0.97 + }, + { + "count": 10, + "qc_value": 0.98 + } + ] +} diff --git a/endToEndTests/test/queries/dateBetween_duplicateValue2.json b/endToEndTests/test/queries/dateBetween_duplicateValue2.json index 80946bb79..27fe3b6e4 100644 --- a/endToEndTests/test/queries/dateBetween_duplicateValue2.json +++ b/endToEndTests/test/queries/dateBetween_duplicateValue2.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 59 + "count": 58 } ] } diff --git a/endToEndTests/test/queries/dateBetween_noBounds.json b/endToEndTests/test/queries/dateBetween_noBounds.json index 5da8b7b02..60372e6b0 100644 --- a/endToEndTests/test/queries/dateBetween_noBounds.json +++ b/endToEndTests/test/queries/dateBetween_noBounds.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 100 + "count": 99 } ] } diff --git a/endToEndTests/test/queries/dateBetween_null_excluded.json b/endToEndTests/test/queries/dateBetween_null_excluded.json new file mode 100644 index 000000000..43d2171cf --- /dev/null +++ b/endToEndTests/test/queries/dateBetween_null_excluded.json @@ -0,0 +1,19 @@ +{ + "testCaseName": "DateBetween Query from an early date with unbounded 'to'", + "query": { + "action": { + "type": "Aggregated" + }, + "filterExpression": { + "type": "DateBetween", + "column": "date", + "from": "2012-03-18", + "to": null + } + }, + "expectedQueryResult": [ + { + "count": 99 + } + ] +} diff --git a/endToEndTests/test/queries/dateBetween_openFrom1.json b/endToEndTests/test/queries/dateBetween_openFrom1.json index 9c7efbfb5..36a0d818c 100644 --- a/endToEndTests/test/queries/dateBetween_openFrom1.json +++ b/endToEndTests/test/queries/dateBetween_openFrom1.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 52 + "count": 51 } ] } diff --git a/endToEndTests/test/queries/dateBetween_openFrom2.json b/endToEndTests/test/queries/dateBetween_openFrom2.json index 4aeddf577..2c1da304b 100644 --- a/endToEndTests/test/queries/dateBetween_openFrom2.json +++ b/endToEndTests/test/queries/dateBetween_openFrom2.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 53 + "count": 52 } ] } diff --git a/endToEndTests/test/queries/dateBetween_openFrom3.json b/endToEndTests/test/queries/dateBetween_openFrom3.json index f922a847e..656616b83 100644 --- a/endToEndTests/test/queries/dateBetween_openFrom3.json +++ b/endToEndTests/test/queries/dateBetween_openFrom3.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 54 + "count": 53 } ] } diff --git a/endToEndTests/test/queries/floatBetween_openTo.json b/endToEndTests/test/queries/floatBetween_openTo.json index 7834ef1c1..ff8a872aa 100644 --- a/endToEndTests/test/queries/floatBetween_openTo.json +++ b/endToEndTests/test/queries/floatBetween_openTo.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 80 + "count": 78 } ] } diff --git a/endToEndTests/test/queries/intBetween.json b/endToEndTests/test/queries/intBetween.json index 246bac32f..4b203fb02 100644 --- a/endToEndTests/test/queries/intBetween.json +++ b/endToEndTests/test/queries/intBetween.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 25 + "count": 33 } ] } diff --git a/endToEndTests/test/queries/intBetween_noBounds.json b/endToEndTests/test/queries/intBetween_noBounds.json index d1b230728..b8a507988 100644 --- a/endToEndTests/test/queries/intBetween_noBounds.json +++ b/endToEndTests/test/queries/intBetween_noBounds.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 100 + "count": 98 } ] } diff --git a/endToEndTests/test/queries/intBetween_openFrom.json b/endToEndTests/test/queries/intBetween_openFrom.json index ed7668258..45ee971a9 100644 --- a/endToEndTests/test/queries/intBetween_openFrom.json +++ b/endToEndTests/test/queries/intBetween_openFrom.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 29 + "count": 36 } ] } diff --git a/endToEndTests/test/queries/intBetween_openTo.json b/endToEndTests/test/queries/intBetween_openTo.json index 4d4fe1af5..52d7e31f4 100644 --- a/endToEndTests/test/queries/intBetween_openTo.json +++ b/endToEndTests/test/queries/intBetween_openTo.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 46 + "count": 45 } ] } diff --git a/endToEndTests/test/queries/intEquals.json b/endToEndTests/test/queries/intEquals.json index 0d1f19fc6..ffcf93e35 100644 --- a/endToEndTests/test/queries/intEquals.json +++ b/endToEndTests/test/queries/intEquals.json @@ -12,7 +12,7 @@ }, "expectedQueryResult": [ { - "count": 9 + "count": 8 } ] } diff --git a/include/silo/common/date.h b/include/silo/common/date.h index fe68ff972..b6b911a3a 100644 --- a/include/silo/common/date.h +++ b/include/silo/common/date.h @@ -1,6 +1,7 @@ #ifndef SILO_DATE_H #define SILO_DATE_H +#include #include namespace silo::common { @@ -9,7 +10,7 @@ typedef uint32_t Date; silo::common::Date stringToDate(const std::string& value); -std::string dateToString(silo::common::Date date); +std::optional dateToString(silo::common::Date date); } // namespace silo::common diff --git a/include/silo/common/nulltype.h b/include/silo/common/nulltype.h new file mode 100644 index 000000000..2b1bc6811 --- /dev/null +++ b/include/silo/common/nulltype.h @@ -0,0 +1,10 @@ +#ifndef SILO_NULLTYPE_H +#define SILO_NULLTYPE_H + +namespace silo { + +struct NullType {}; + +} // namespace silo + +#endif // SILO_NULLTYPE_H diff --git a/include/silo/query_engine/query_result.h b/include/silo/query_engine/query_result.h index ff2bdd17a..55f388a1f 100644 --- a/include/silo/query_engine/query_result.h +++ b/include/silo/query_engine/query_result.h @@ -1,16 +1,19 @@ #ifndef SILO_QUERY_ENGINE_RESULT_H #define SILO_QUERY_ENGINE_RESULT_H +#include #include #include #include #include +#include "silo/common/nulltype.h" + namespace silo::query_engine { struct QueryResultEntry { - std::map> fields; + std::map>> fields; }; struct QueryResult { diff --git a/src/silo/common/date.cpp b/src/silo/common/date.cpp index 64a7338bd..61c782876 100644 --- a/src/silo/common/date.cpp +++ b/src/silo/common/date.cpp @@ -1,6 +1,5 @@ #include "silo/common/date.h" -#include #include #include #include @@ -8,6 +7,9 @@ #include "silo/common/date_format_exception.h" silo::common::Date silo::common::stringToDate(const std::string& value) { + if (value.empty()) { + return 0; + } auto split_position = value.find('-', 0); if (split_position == value.size()) { throw DateFormatException("Expect dates to be delimited by '-': " + value); @@ -39,7 +41,10 @@ silo::common::Date silo::common::stringToDate(const std::string& value) { } } -std::string silo::common::dateToString(silo::common::Date date) { +std::optional silo::common::dateToString(silo::common::Date date) { + if (date == 0) { + return std::nullopt; + } // Date is stored with the year in the upper 16 bits, month in bits [12,16), and day [0,12) const uint32_t year = date >> 16; const uint32_t month = (date >> 12) & 0xF; diff --git a/src/silo/common/date.test.cpp b/src/silo/common/date.test.cpp index 24469c1fc..12add53f1 100644 --- a/src/silo/common/date.test.cpp +++ b/src/silo/common/date.test.cpp @@ -9,10 +9,11 @@ TEST(Date, correctlyParsesDate) { EXPECT_EQ(silo::common::stringToDate("2023-1-01"), (2023 << 16) + (1 << 12) + 1); EXPECT_EQ(silo::common::stringToDate("2010-12-03"), (2010 << 16) + (12 << 12) + 3); EXPECT_EQ(silo::common::stringToDate("12-12-12"), (12 << 16) + (12 << 12) + 12); + EXPECT_EQ(silo::common::stringToDate(""), 0); } TEST(Date, throwsExceptionOnWrongDates) { - EXPECT_THROW(silo::common::stringToDate(""), silo::common::DateFormatException); + EXPECT_THROW(silo::common::stringToDate("?"), silo::common::DateFormatException); EXPECT_THROW(silo::common::stringToDate("----"), silo::common::DateFormatException); EXPECT_THROW(silo::common::stringToDate("31-31-31"), silo::common::DateFormatException); EXPECT_THROW(silo::common::stringToDate("-1-"), silo::common::DateFormatException); @@ -28,4 +29,6 @@ TEST(Date, correctlyReprintsStrings) { EXPECT_EQ(silo::common::dateToString(silo::common::stringToDate("2020-01-01")), "2020-01-01"); EXPECT_EQ(silo::common::dateToString(silo::common::stringToDate("2023-01-1")), "2023-01-01"); EXPECT_EQ(silo::common::dateToString(silo::common::stringToDate("2010-12-3")), "2010-12-03"); + + EXPECT_EQ(silo::common::dateToString(silo::common::stringToDate("")), std::nullopt); } diff --git a/src/silo/common/string.test.cpp b/src/silo/common/string.test.cpp index 7fad466c1..6945fabf5 100644 --- a/src/silo/common/string.test.cpp +++ b/src/silo/common/string.test.cpp @@ -14,6 +14,13 @@ TEST(String, correctToString) { EXPECT_EQ(underTest.toString(dict), "value 1"); } +TEST(String, correctWithEmptyString) { + BidirectionalMap dict; + String underTest("", dict); + + EXPECT_EQ(underTest.toString(dict), ""); +} + TEST(String, correctToStringLong) { BidirectionalMap dict; String underTest("some longer value 1", dict); diff --git a/src/silo/preprocessing/pango_lineage_count.test.cpp b/src/silo/preprocessing/pango_lineage_count.test.cpp index c5e9bbed5..f03123146 100644 --- a/src/silo/preprocessing/pango_lineage_count.test.cpp +++ b/src/silo/preprocessing/pango_lineage_count.test.cpp @@ -18,10 +18,12 @@ TEST(PangoLineageCounts, buildPangoLineageCounts) { ); ASSERT_EQ(result.pango_lineage_counts.size(), 24); - ASSERT_EQ(result.pango_lineage_counts[0].pango_lineage, "B.1"); - ASSERT_EQ(result.pango_lineage_counts[0].count_of_sequences, 3); - ASSERT_EQ(result.pango_lineage_counts[6].pango_lineage, "B.1.1.7"); - ASSERT_EQ(result.pango_lineage_counts[6].count_of_sequences, 48); + ASSERT_EQ(result.pango_lineage_counts[0].pango_lineage, ""); + ASSERT_EQ(result.pango_lineage_counts[0].count_of_sequences, 1); + ASSERT_EQ(result.pango_lineage_counts[1].pango_lineage, "B.1"); + ASSERT_EQ(result.pango_lineage_counts[1].count_of_sequences, 3); + ASSERT_EQ(result.pango_lineage_counts[7].pango_lineage, "B.1.1.7"); + ASSERT_EQ(result.pango_lineage_counts[7].count_of_sequences, 48); ASSERT_EQ(result.pango_lineage_counts[23].pango_lineage, "B.1.617.2.9.2"); ASSERT_EQ(result.pango_lineage_counts[23].count_of_sequences, 1); } diff --git a/src/silo/query_engine/actions/aggregated.cpp b/src/silo/query_engine/actions/aggregated.cpp index 97984e67b..7030f1d59 100644 --- a/src/silo/query_engine/actions/aggregated.cpp +++ b/src/silo/query_engine/actions/aggregated.cpp @@ -1,6 +1,7 @@ #include "silo/query_engine/actions/aggregated.h" #include +#include #include #include #include @@ -22,6 +23,8 @@ namespace silo::query_engine::actions { +using json_value_type = std::optional>; + size_t getTupleSize(std::vector& group_by_metadata) { size_t size = 0; for (const auto& metadata : group_by_metadata) { @@ -83,8 +86,8 @@ struct Tuple { } } - std::map> getFields() const { - std::map> fields; + [[nodiscard]] std::map getFields() const { + std::map fields; const char* data_pointer = data.data(); for (const auto& metadata : columns.metadata) { if (metadata.getColumnType() == config::ColumnType::DATE) { @@ -93,26 +96,49 @@ struct Tuple { data_pointer += sizeof(decltype(value)); } else if (metadata.getColumnType() == config::ColumnType::INT) { const int32_t value = *reinterpret_cast(data_pointer); - fields[metadata.name] = value; + if (value == INT32_MIN) { + fields[metadata.name] = std::nullopt; + } else { + fields[metadata.name] = value; + } data_pointer += sizeof(decltype(value)); } else if (metadata.getColumnType() == config::ColumnType::FLOAT) { const double value = *reinterpret_cast(data_pointer); - fields[metadata.name] = value; + if (std::isnan(value)) { + fields[metadata.name] = std::nullopt; + } else { + fields[metadata.name] = value; + } data_pointer += sizeof(decltype(value)); } else if (metadata.getColumnType() == config::ColumnType::STRING) { const common::String value = *reinterpret_cast*>(data_pointer); - fields[metadata.name] = columns.string_columns.at(metadata.name).lookupValue(value); + std::string string_value = columns.string_columns.at(metadata.name).lookupValue(value); + if (string_value.empty()) { + fields[metadata.name] = std::nullopt; + } else { + fields[metadata.name] = string_value; + } data_pointer += sizeof(decltype(value)); } else if (metadata.getColumnType() == config::ColumnType::INDEXED_PANGOLINEAGE) { const silo::Idx value = *reinterpret_cast(data_pointer); - fields[metadata.name] = + std::string string_value = columns.pango_lineage_columns.at(metadata.name).lookupValue(value).value; + if (string_value.empty()) { + fields[metadata.name] = std::nullopt; + } else { + fields[metadata.name] = string_value; + } data_pointer += sizeof(decltype(value)); } else if (metadata.getColumnType() == config::ColumnType::INDEXED_STRING) { const silo::Idx value = *reinterpret_cast(data_pointer); - fields[metadata.name] = + std::string string_value = columns.indexed_string_columns.at(metadata.name).lookupValue(value); + if (string_value.empty()) { + fields[metadata.name] = std::nullopt; + } else { + fields[metadata.name] = string_value; + } data_pointer += sizeof(decltype(value)); } else { throw std::runtime_error("Unchecked column type of column " + metadata.name); @@ -173,14 +199,13 @@ void applyOrderByAndLimit( const std::vector& order_by_fields, std::optional limit ) { - auto cmp = [&order_by_fields](const QueryResultEntry& value1, const QueryResultEntry& value2) { + auto cmp = [&order_by_fields](const QueryResultEntry& entry1, const QueryResultEntry& entry2) { for (const OrderByField& field : order_by_fields) { - if (value1.fields.at(field.name) < value2.fields.at(field.name)) { - return field.ascending; - } - if (value2.fields.at(field.name) < value1.fields.at(field.name)) { - return !field.ascending; + if (entry1.fields.at(field.name) == entry2.fields.at(field.name)) { + continue; } + return entry1.fields.at(field.name) < entry2.fields.at(field.name) ? field.ascending + : !field.ascending; } return false; }; @@ -197,7 +222,7 @@ std::vector generateResult(std::unordered_map std::vector result; result.reserve(tuple_counts.size()); for (auto& [tuple, count] : tuple_counts) { - std::map> fields = tuple.getFields(); + std::map fields = tuple.getFields(); fields[COUNT_FIELD] = static_cast(count); result.push_back({fields}); } @@ -209,7 +234,7 @@ QueryResult aggregateWithoutGrouping(const std::vector& bitmap_f for (const auto& filter : bitmap_filters) { count += filter->cardinality(); }; - std::map> tuple_fields; + std::map tuple_fields; tuple_fields[COUNT_FIELD] = static_cast(count); return QueryResult{std::vector{{tuple_fields}}}; } diff --git a/src/silo/query_engine/actions/details.cpp b/src/silo/query_engine/actions/details.cpp index bdeee5169..9d5dd6a85 100644 --- a/src/silo/query_engine/actions/details.cpp +++ b/src/silo/query_engine/actions/details.cpp @@ -40,7 +40,8 @@ QueryResult Details::execute( const auto& bitmap = bitmap_filter[partition_id]; const auto& columns = database.partitions[partition_id].columns; for (const Idx sequence_id : *bitmap) { - std::map> row_fields; + std::map>> + row_fields; for (const auto& metadata : field_metadata) { if (metadata.getColumnType() == config::ColumnType::DATE) { const common::Date value = @@ -48,23 +49,46 @@ QueryResult Details::execute( row_fields[metadata.name] = common::dateToString(value); } else if (metadata.getColumnType() == config::ColumnType::INT) { const int32_t value = columns.int_columns.at(metadata.name).getValues()[sequence_id]; - row_fields[metadata.name] = value; + if (value == INT32_MIN) { + row_fields[metadata.name] = std::nullopt; + } else { + row_fields[metadata.name] = value; + } } else if (metadata.getColumnType() == config::ColumnType::FLOAT) { const double value = columns.float_columns.at(metadata.name).getValues()[sequence_id]; - row_fields[metadata.name] = value; + if (value == std::nan("")) { + row_fields[metadata.name] = std::nullopt; + } else { + row_fields[metadata.name] = value; + } } else if (metadata.getColumnType() == config::ColumnType::STRING) { const auto& column = columns.string_columns.at(metadata.name); const common::String value = column.getValues()[sequence_id]; - row_fields[metadata.name] = column.lookupValue(value); + std::string string_value = column.lookupValue(value); + if (string_value.empty()) { + row_fields[metadata.name] = std::nullopt; + } else { + row_fields[metadata.name] = string_value; + } } else if (metadata.getColumnType() == config::ColumnType::INDEXED_PANGOLINEAGE) { const auto& column = columns.pango_lineage_columns.at(metadata.name); const silo::Idx value = column.getValues()[sequence_id]; - row_fields[metadata.name] = column.lookupValue(value).value; + std::string string_value = column.lookupValue(value).value; + if (string_value.empty()) { + row_fields[metadata.name] = std::nullopt; + } else { + row_fields[metadata.name] = string_value; + } } else if (metadata.getColumnType() == config::ColumnType::INDEXED_STRING) { const auto& column = columns.indexed_string_columns.at(metadata.name); const silo::Idx value = column.getValues()[sequence_id]; - row_fields[metadata.name] = column.lookupValue(value); + std::string string_value = column.lookupValue(value); + if (string_value.empty()) { + row_fields[metadata.name] = std::nullopt; + } else { + row_fields[metadata.name] = string_value; + } } else { throw std::runtime_error("Unchecked column type of column " + metadata.name); } diff --git a/src/silo/query_engine/actions/nuc_mutations.cpp b/src/silo/query_engine/actions/nuc_mutations.cpp index 896ae3bf8..95fd0cc0d 100644 --- a/src/silo/query_engine/actions/nuc_mutations.cpp +++ b/src/silo/query_engine/actions/nuc_mutations.cpp @@ -161,15 +161,17 @@ QueryResult NucMutations::execute( count_of_mutations_per_position[static_cast(symbol)][pos]; if (count > threshold_count) { const double proportion = static_cast(count) / static_cast(total); - const std::map> fields{ - {"position", - SYMBOL_REPRESENTATION[static_cast( - symbol_in_reference_genome.value_or(NUCLEOTIDE_SYMBOL::N) - )] + - std::to_string(pos + 1) + - SYMBOL_REPRESENTATION[static_cast(symbol)]}, - {"proportion", proportion}, - {"count", static_cast(count)}}; + const std:: + map>> + fields{ + {"position", + SYMBOL_REPRESENTATION[static_cast( + symbol_in_reference_genome.value_or(NUCLEOTIDE_SYMBOL::N) + )] + + std::to_string(pos + 1) + + SYMBOL_REPRESENTATION[static_cast(symbol)]}, + {"proportion", proportion}, + {"count", static_cast(count)}}; mutation_proportions.push_back({fields}); } } diff --git a/src/silo/query_engine/filter_expressions/date_between.cpp b/src/silo/query_engine/filter_expressions/date_between.cpp index 5ac9e256a..1228d664e 100644 --- a/src/silo/query_engine/filter_expressions/date_between.cpp +++ b/src/silo/query_engine/filter_expressions/date_between.cpp @@ -25,9 +25,13 @@ DateBetween::DateBetween( std::string DateBetween::toString(const silo::Database& /*database*/) const { std::string res = "[Date-between "; - res += (date_from.has_value() ? silo::common::dateToString(date_from.value()) : "unbounded"); + res += + (date_from.has_value() ? silo::common::dateToString(date_from.value()).value_or("") + : "unbounded"); res += " and "; - res += (date_to.has_value() ? silo::common::dateToString(date_to.value()) : "unbounded"); + res += + (date_to.has_value() ? silo::common::dateToString(date_to.value()).value_or("") : "unbounded" + ); res += "]"; return res; } @@ -44,7 +48,7 @@ std::unique_ptr DateBetween::compile( children.emplace_back(std::make_unique>( date_column.getValues(), operators::Selection::HIGHER_OR_EQUALS, - date_from.value_or(silo::common::Date{0}), + date_from.value_or(silo::common::Date{1}), database_partition.sequenceCount )); children.emplace_back(std::make_unique>( @@ -78,8 +82,8 @@ std::vector DateBetween:: for (const auto& chunk : chunks) { const auto* begin = &date_column.getValues()[chunk.offset]; const auto* end = &date_column.getValues()[chunk.offset + chunk.count_of_sequences]; - const auto* lower = - date_from.has_value() ? std::lower_bound(begin, end, date_from.value()) : begin; + // If lower bound is empty we use 1 as the lower-bound, as 0 represents NULL values + const auto* lower = std::lower_bound(begin, end, date_from.value_or(2)); uint32_t const lower_index = lower - base; const auto* upper = date_to.has_value() ? std::upper_bound(begin, end, date_to.value()) : end; uint32_t const upper_index = upper - base; @@ -98,13 +102,13 @@ void from_json(const nlohmann::json& json, std::unique_ptr& filter) ) CHECK_SILO_QUERY(json.contains("from"), "The field 'from' is required in DateBetween expression") CHECK_SILO_QUERY( - json["from"].is_null() || json["from"].is_string(), + json["from"].is_null() || (json["from"].is_string() && !json["from"].empty()), "The field 'from' in a DateBetween expression needs to be a string or null" ) CHECK_SILO_QUERY(json.contains("to"), "The field 'to' is required in a DateBetween expression") CHECK_SILO_QUERY( - json["to"].is_null() || json["to"].is_string(), - "The field 'to' in a DateBetween expression needs to be a string or null" + json["to"].is_null() || (json["to"].is_string() && !json["to"].empty()), + "The field 'to' in a DateBetween expression needs to be a non-empty string or null" ) const std::string& column = json["column"]; std::optional date_from; diff --git a/src/silo/query_engine/filter_expressions/float_equals.cpp b/src/silo/query_engine/filter_expressions/float_equals.cpp index 63b0b0d9f..84847e849 100644 --- a/src/silo/query_engine/filter_expressions/float_equals.cpp +++ b/src/silo/query_engine/filter_expressions/float_equals.cpp @@ -1,5 +1,7 @@ #include "silo/query_engine/filter_expressions/float_equals.h" +#include + #include #include "silo/database.h" @@ -19,9 +21,9 @@ std::string FloatEquals::toString(const silo::Database& /*database*/) const { } std::unique_ptr FloatEquals::compile( - const silo::Database& database, + const silo::Database& /*database*/, const silo::DatabasePartition& database_partition, - silo::query_engine::filter_expressions::Expression::AmbiguityMode mode + silo::query_engine::filter_expressions::Expression::AmbiguityMode /*mode*/ ) const { if (!database_partition.columns.float_columns.contains(column)) { return std::make_unique(database_partition.sequenceCount); @@ -48,11 +50,11 @@ void from_json(const nlohmann::json& json, std::unique_ptr& filter) json.contains("value"), "The field 'value' is required in an FloatEquals expression" ) CHECK_SILO_QUERY( - json["value"].is_number_float(), + json["value"].is_number_float() || json["value"].is_null(), "The field 'value' in an FloatEquals expression must be a float" ) const std::string& column = json["column"]; - const double& value = json["value"]; + const double& value = json["value"].is_null() ? std::nan("") : json["value"].get(); filter = std::make_unique(column, value); } diff --git a/src/silo/query_engine/filter_expressions/int_between.cpp b/src/silo/query_engine/filter_expressions/int_between.cpp index 3be82dfc1..3b8540600 100644 --- a/src/silo/query_engine/filter_expressions/int_between.cpp +++ b/src/silo/query_engine/filter_expressions/int_between.cpp @@ -33,12 +33,12 @@ std::unique_ptr IntBetween::compile( children.emplace_back(std::make_unique>( int_column.getValues(), operators::Selection::HIGHER_OR_EQUALS, - from.value_or(0), + from.value_or(INT32_MIN + 1), database_partition.sequenceCount )); children.emplace_back(std::make_unique>( int_column.getValues(), - operators::Selection::LESS, + operators::Selection::LESS_OR_EQUALS, to.value_or(INT32_MAX), database_partition.sequenceCount )); diff --git a/src/silo/query_engine/filter_expressions/int_equals.cpp b/src/silo/query_engine/filter_expressions/int_equals.cpp index 6ceeeb9ad..8fbaf65d6 100644 --- a/src/silo/query_engine/filter_expressions/int_equals.cpp +++ b/src/silo/query_engine/filter_expressions/int_equals.cpp @@ -49,11 +49,11 @@ void from_json(const nlohmann::json& json, std::unique_ptr& filter) { json.contains("value"), "The field 'value' is required in an IntEquals expression" ) CHECK_SILO_QUERY( - json["value"].is_number_integer(), - "The field 'value' in an IntEquals expression must be an integer" + json["value"].is_number_integer() || json["value"].is_null(), + "The field 'value' in an IntEquals expression must be an integer or null" ) const std::string& column = json["column"]; - const int32_t& value = json["value"]; + const int32_t& value = json["value"].is_null() ? INT32_MIN : json["value"].get(); filter = std::make_unique(column, value); } diff --git a/src/silo/query_engine/filter_expressions/string_equals.cpp b/src/silo/query_engine/filter_expressions/string_equals.cpp index 0fa10d383..957ccd4f9 100644 --- a/src/silo/query_engine/filter_expressions/string_equals.cpp +++ b/src/silo/query_engine/filter_expressions/string_equals.cpp @@ -67,11 +67,11 @@ void from_json(const nlohmann::json& json, std::unique_ptr& filter json.contains("value"), "The field 'value' is required in an StringEquals expression" ) CHECK_SILO_QUERY( - json["value"].is_string(), - "The field 'value' in an StringEquals expression needs to be a string" + json["value"].is_string() || json["value"].is_null(), + "The field 'value' in an StringEquals expression needs to be a string or null" ) const std::string& column = json["column"]; - const std::string& value = json["value"]; + const std::string& value = json["value"].is_null() ? "" : json["value"].get(); filter = std::make_unique(column, value); } diff --git a/src/silo/query_engine/query_result.cpp b/src/silo/query_engine/query_result.cpp index fdbb19d7b..4a8a347e9 100644 --- a/src/silo/query_engine/query_result.cpp +++ b/src/silo/query_engine/query_result.cpp @@ -16,7 +16,11 @@ void to_json(nlohmann::json& json, const QueryResult& query_result) { // NOLINTNEXTLINE(readability-identifier-naming) void to_json(nlohmann::json& json, const QueryResultEntry& result_entry) { for (const auto& [field, value] : result_entry.fields) { - json[field] = value; + if (value.has_value()) { + json[field] = value.value(); + } else { + json[field] = nlohmann::json(); + } } } diff --git a/src/silo/storage/column/date_column.test.cpp b/src/silo/storage/column/date_column.test.cpp index ee336ec73..97f2203c6 100644 --- a/src/silo/storage/column/date_column.test.cpp +++ b/src/silo/storage/column/date_column.test.cpp @@ -10,4 +10,5 @@ TEST(DateColumn, insertValues) { under_test.insert(silo::common::stringToDate("2021-12-03")); under_test.insert(silo::common::stringToDate("2025-01-01")); under_test.insert(silo::common::stringToDate("2021-03-21")); + under_test.insert(silo::common::stringToDate("")); } diff --git a/src/silo/storage/column_group.cpp b/src/silo/storage/column_group.cpp index dcc671469..546d181cd 100644 --- a/src/silo/storage/column_group.cpp +++ b/src/silo/storage/column_group.cpp @@ -32,9 +32,11 @@ unsigned ColumnGroup::fill( } else if (column_type == silo::config::ColumnType::DATE) { date_columns.at(item.name).insert(common::stringToDate(value)); } else if (column_type == silo::config::ColumnType::INT) { - int_columns.at(item.name).insert(std::stoi(value)); + const int32_t int_value = value.empty() ? INT32_MIN : std::stoi(value); + int_columns.at(item.name).insert(int_value); } else if (column_type == silo::config::ColumnType::FLOAT) { - float_columns.at(item.name).insert(std::stod(value)); + const double double_value = value.empty() ? std::nan("") : std::stod(value); + float_columns.at(item.name).insert(double_value); } } ++sequence_count; diff --git a/src/silo_api/request_handler_factory.test.cpp b/src/silo_api/request_handler_factory.test.cpp index 4ff403ca6..d6188ee76 100644 --- a/src/silo_api/request_handler_factory.test.cpp +++ b/src/silo_api/request_handler_factory.test.cpp @@ -103,7 +103,8 @@ TEST_F(RequestHandlerTestFixture, returnsMethodNotAllowedOnPostInfoRequest) { } TEST_F(RequestHandlerTestFixture, handlesPostQueryRequest) { - std::map> fields{{"count", 5}}; + std::map>> fields{ + {"count", 5}}; std::vector tmp{{fields}}; const silo::query_engine::QueryResult query_result{tmp}; EXPECT_CALL(mock_query_engine, executeQuery).WillRepeatedly(testing::Return(query_result)); diff --git a/testBaseData/small_metadata_set.tsv b/testBaseData/small_metadata_set.tsv index 9a69bb713..3b5becdee 100644 --- a/testBaseData/small_metadata_set.tsv +++ b/testBaseData/small_metadata_set.tsv @@ -1,17 +1,17 @@ gisaid_epi_isl pango_lineage date region country division unsorted_date age qc_value -EPI_ISL_1408408 B.1.1.7 2021-03-18 Europe Switzerland Basel-Land 2021-01-15 4 0.98 +EPI_ISL_1408408 B.1.1.7 2021-03-18 Europe Switzerland Basel-Land 4 0.98 EPI_ISL_1749899 B.1.1.7 2021-04-13 Europe Switzerland Bern 2020-03-08 5 0.97 EPI_ISL_2016901 B.1.1.7 2021-04-25 Europe Switzerland Aargau 2021-01-29 6 0.96 EPI_ISL_1749892 B.1.1.7 2021-04-13 Europe Switzerland Bern 2020-12-24 4 0.95 EPI_ISL_1597932 B.1.1.7 2021-03-19 Europe Switzerland Solothurn 2021-02-10 54 0.94 -EPI_ISL_1407962 B.1.1.7 2021-03-15 Europe Switzerland Solothurn 2021-01-16 55 0.93 +EPI_ISL_1407962 B.1.1.7 Europe Switzerland Solothurn 2021-01-16 55 0.93 EPI_ISL_1750503 B.1.258.17 2020-12-24 Europe Switzerland Zürich 2021-02-14 56 0.92 EPI_ISL_1360935 B.1.1.7 2021-03-08 Europe Switzerland Jura 2021-01-03 57 0.91 EPI_ISL_2019235 B.1.1.7 2021-04-28 Europe Switzerland Basel-Stadt 2021-01-22 58 0.90 EPI_ISL_1749960 B.1.1.7 2021-04-15 Europe Switzerland Basel-Land 2021-02-03 59 0.89 EPI_ISL_1361468 B.1.1.7 2021-03-06 Europe Switzerland Zürich 2021-01-20 50 0.98 EPI_ISL_1408062 B.1.1.7 2021-03-03 Europe Switzerland Valais 2020-11-24 50 0.97 -EPI_ISL_1597890 B.1.1.7 2021-03-21 Europe Switzerland Vaud 2021-01-25 51 0.96 +EPI_ISL_1597890 B.1.1.7 2021-03-21 "" Switzerland Vaud 2021-01-25 51 0.96 EPI_ISL_1682849 B.1.236 2020-12-17 Europe Switzerland Thurgau 2021-01-21 52 0.95 EPI_ISL_1408805 B.1.221 2020-11-24 Europe Switzerland Schwyz 2020-12-09 53 0.94 EPI_ISL_1750868 B.1.1.189 2020-12-15 Europe Switzerland Solothurn 2021-01-20 54 0.93 @@ -22,7 +22,7 @@ EPI_ISL_2214128 B.1.1.7 2021-05-10 Europe Switzerland Geneva 2020-11-13 58 0.89 EPI_ISL_2408472 B.1.1.7 2021-05-25 Europe Switzerland Obwalden 2021-03-02 59 0.98 EPI_ISL_830864 B.1.177 2020-10-08 Europe Switzerland Basel-Stadt 2021-03-03 50 0.97 EPI_ISL_581968 B.1.160 2020-08-17 Europe Switzerland Basel-Stadt 2021-03-25 50 0.96 -EPI_ISL_2213804 Q.7 2021-05-08 Europe Switzerland Geneva 2021-04-12 51 0.95 +EPI_ISL_2213804 Q.7 2021-05-08 Europe Switzerland Geneva 2021-04-12 51 "" EPI_ISL_2405276 B.1.1.7 2021-05-24 Europe Switzerland Vaud 2021-04-28 52 0.94 EPI_ISL_2213934 B.1.1.7 2021-05-13 Europe Switzerland Geneva 2021-04-23 53 0.93 EPI_ISL_2213984 B.1.1.7 2021-05-08 Europe Switzerland Geneva 2021-05-09 54 0.92 @@ -43,7 +43,7 @@ EPI_ISL_1003036 B.1.177 2021-01-16 Europe Switzerland Aargau 2021-07-14 57 0.98 EPI_ISL_899762 B.1.177 2020-12-25 Europe Switzerland Schwyz 2021-07-19 58 0.97 EPI_ISL_899725 B.1.177 2021-01-12 Europe Switzerland Solothurn 2021-07-14 59 0.96 EPI_ISL_1195052 B.1.1.7 2021-02-23 Europe Switzerland Solothurn 2021-07-04 50 0.95 -EPI_ISL_1003519 B.1.160.16 2021-01-22 Europe Switzerland Solothurn 2021-07-29 50 0.94 +EPI_ISL_1003519 B.1.160.16 2021-01-22 Europe Switzerland "" 2021-07-29 50 0.94 EPI_ISL_1003010 B.1.36.35 2021-01-15 Europe Switzerland Solothurn 2021-07-19 51 0.93 EPI_ISL_1119584 B.1.1 2020-11-04 Europe Switzerland Solothurn 2021-07-05 52 0.92 EPI_ISL_1002052 B.1 2021-01-15 Europe Switzerland Solothurn 2021-07-15 53 0.91 @@ -53,7 +53,7 @@ EPI_ISL_768148 B.1.160 2020-12-24 Europe Switzerland Sankt Gallen 2020-03-16 56 EPI_ISL_1080536 B.1.1.7 2021-02-10 Europe Switzerland Basel-Land 2021-08-04 57 0.97 EPI_ISL_1002156 B.1.221 2021-01-16 Europe Switzerland Basel-Land 2021-02-03 58 0.96 EPI_ISL_1119315 B.1.1.7 2021-02-14 Europe Switzerland Graubünden 2021-03-18 59 0.95 -EPI_ISL_1004495 B.1.177.44 2021-01-03 Europe Switzerland Bern 2021-04-13 50 0.94 +EPI_ISL_1004495 B.1.177.44 2021-01-03 Europe Switzerland "" 2021-04-13 50 0.94 EPI_ISL_1001920 B.1.177 2021-01-22 Europe Switzerland Bern 2021-04-25 50 0.93 EPI_ISL_1131102 B.1.160 2021-02-03 Europe Switzerland Zürich 2021-04-13 51 0.92 EPI_ISL_1003373 B.1.177 2021-01-20 Europe Switzerland Zürich 2021-03-19 52 0.91 @@ -66,7 +66,7 @@ EPI_ISL_1260480 B.1.160 2020-12-21 Europe Switzerland Zürich 2021-03-06 58 0.95 EPI_ISL_1747885 B.1.1.7 2021-03-09 Europe Switzerland Solothurn 2021-03-03 59 0.94 EPI_ISL_1747752 B.1.1.7 2021-03-05 Europe Switzerland Basel-Land 2021-03-21 50 0.93 EPI_ISL_1005148 B.1.221 2020-11-13 Europe Switzerland Solothurn 2020-12-17 50 0.92 -EPI_ISL_1748243 B.1.1.7 2021-03-02 Europe Switzerland Solothurn 2020-11-24 51 0.91 +EPI_ISL_1748243 B.1.1.7 2021-03-02 Europe Switzerland Solothurn 2020-11-24 "" 0.91 EPI_ISL_1748215 B.1.1.7 2021-03-03 Europe Switzerland Solothurn 2020-12-15 52 0.90 EPI_ISL_1748395 B.1.1.7 2021-03-25 Europe Switzerland Basel-Stadt 2021-04-27 53 0.89 EPI_ISL_1760534 B.1.1.7 2021-04-12 Europe Switzerland Ticino 2021-04-23 54 0.98 @@ -82,7 +82,7 @@ EPI_ISL_2307766 B.1.1.7 2021-05-11 Europe Switzerland Bern 2021-05-08 52 0.89 EPI_ISL_2375490 B.1.1.7 2021-05-10 Europe Switzerland Valais 2021-06-10 53 0.98 EPI_ISL_2374969 B.1.1.7 2021-05-18 Europe Switzerland Aargau 2021-06-05 54 0.97 EPI_ISL_2307888 B.1.1.7 2021-05-08 Europe Switzerland Solothurn 2021-05-23 55 0.96 -EPI_ISL_2375247 B.1.1.7 2021-05-14 Europe Switzerland Sankt Gallen 2021-05-11 56 0.95 +EPI_ISL_2375247 B.1.1.7 2021-05-14 Europe Switzerland Sankt Gallen 2021-05-11 56 "" EPI_ISL_2308054 B.1.1.7 2021-05-07 Europe Switzerland Zürich 2020-12-09 57 0.94 EPI_ISL_2375165 B.1.1.7 2021-05-18 Europe Switzerland Basel-Land 2020-10-28 58 0.93 EPI_ISL_2375097 B.1.1.7 2021-05-16 Europe Switzerland Basel-Land 2020-10-22 59 0.92 @@ -92,8 +92,8 @@ EPI_ISL_3086369 AY.122 2021-07-14 Europe Switzerland Ticino 2020-12-14 51 0.89 EPI_ISL_3259931 AY.43 2021-07-04 Europe Switzerland Vaud 2020-12-29 52 0.98 EPI_ISL_3267832 AY.43 2021-07-29 Europe Switzerland Bern 2021-01-25 53 0.97 EPI_ISL_3128796 B.1.617.2 2021-07-19 Europe Switzerland Zürich 2020-12-13 54 0.96 -EPI_ISL_3016465 B.1.1.7 2021-07-05 Europe Switzerland Valais 2021-01-16 55 0.95 -EPI_ISL_3247294 AY.42 2021-07-15 Europe Switzerland Basel-Stadt 2020-12-25 56 0.94 +EPI_ISL_3016465 B.1.1.7 2021-07-05 Europe Switzerland Valais 2021-01-16 "" 0.95 +EPI_ISL_3247294 "" 2021-07-15 Europe Switzerland Basel-Stadt 2020-12-25 56 0.94 EPI_ISL_3578231 P.1 2021-05-12 Europe Switzerland Zürich 2021-01-12 57 0.93 EPI_ISL_3465732 AY.43 2021-08-05 Europe Switzerland Vaud 2021-02-23 58 0.92 EPI_ISL_2367431 B.1 2020-03-16 Europe Switzerland Vaud 2021-01-22 59 0.91