diff --git a/endToEndTests/test/info.test.js b/endToEndTests/test/info.test.js index 3ea425608..0a5a4310c 100644 --- a/endToEndTests/test/info.test.js +++ b/endToEndTests/test/info.test.js @@ -7,7 +7,7 @@ describe('The /info endpoint', () => { .get('/info') .expect(200) .expect('Content-Type', 'application/json') - .expect({ nBitmapsSize: 3898, sequenceCount: 100, totalSize: 66467326 }) + .expect({ nBitmapsSize: 3898, sequenceCount: 100, totalSize: 68915226 }) .end(done); }); @@ -26,7 +26,7 @@ describe('The /info endpoint', () => { 'bitmapContainerSizeStatistic' ); expect(returnedInfo.bitmapContainerSizePerGenomeSection.bitmapContainerSizeStatistic).to.deep.equal({ - numberOfArrayContainers: 716007, + numberOfArrayContainers: 745081, numberOfBitsetContainers: 0, numberOfRunContainers: 0, numberOfValuesStoredInArrayContainers: 2929577, @@ -61,22 +61,22 @@ describe('The /info endpoint', () => { expect(returnedInfo).to.have.property('bitmapSizePerSymbol'); expect(returnedInfo.bitmapSizePerSymbol).to.deep.equal({ - '-': 5779958, - 'A': 9190510, - 'B': 5741376, - 'C': 7859992, - 'D': 5741376, - 'G': 8006876, - 'H': 5741376, - 'K': 5741498, - 'M': 5741466, - 'N': 5741376, - 'R': 5741426, - 'S': 5741376, - 'T': 9456412, - 'V': 5741376, - 'W': 5741426, - 'Y': 5741406, + '-': 6019718, + 'A': 9498982, + 'B': 5980600, + 'C': 8141784, + 'D': 5980600, + 'G': 8291636, + 'H': 5980600, + 'K': 5980730, + 'M': 5980690, + 'N': 5980600, + 'R': 5980650, + 'S': 5980600, + 'T': 9770332, + 'V': 5980600, + 'W': 5980650, + 'Y': 5980630, }); }) .end(done); diff --git a/endToEndTests/test/queries/GroupByLineage.json b/endToEndTests/test/queries/GroupByLineage.json index 640957ef0..5e46ec9e2 100644 --- a/endToEndTests/test/queries/GroupByLineage.json +++ b/endToEndTests/test/queries/GroupByLineage.json @@ -3,12 +3,8 @@ "query": { "action": { "type": "Aggregated", - "groupByFields": [ - "pango_lineage" - ], - "orderByFields": [ - "pango_lineage" - ] + "groupByFields": ["pango_lineage"], + "orderByFields": ["pango_lineage"] }, "filterExpression": { "type": "True" diff --git a/endToEndTests/test/queries/GroupByLineageOrderByCountLimit.json b/endToEndTests/test/queries/GroupByLineageOrderByCountLimit.json index e1035d8d5..8099acb3e 100644 --- a/endToEndTests/test/queries/GroupByLineageOrderByCountLimit.json +++ b/endToEndTests/test/queries/GroupByLineageOrderByCountLimit.json @@ -3,9 +3,7 @@ "query": { "action": { "type": "Aggregated", - "groupByFields": [ - "pango_lineage" - ], + "groupByFields": ["pango_lineage"], "orderByFields": [ { "field": "count", diff --git a/endToEndTests/test/queries/OffsetLimitOverlap.json b/endToEndTests/test/queries/OffsetLimitOverlap.json index 95899046e..643f5758d 100644 --- a/endToEndTests/test/queries/OffsetLimitOverlap.json +++ b/endToEndTests/test/queries/OffsetLimitOverlap.json @@ -3,9 +3,7 @@ "query": { "action": { "type": "Details", - "orderByFields": [ - "gisaid_epi_isl" - ], + "orderByFields": ["gisaid_epi_isl"], "offset": 90, "limit": 90 }, diff --git a/include/silo/storage/column_group.h b/include/silo/storage/column_group.h index c124bb884..bacc99eeb 100644 --- a/include/silo/storage/column_group.h +++ b/include/silo/storage/column_group.h @@ -41,7 +41,9 @@ class DatabaseConfig; namespace silo::storage { -struct ColumnPartitionGroup { +class ColumnPartitionGroup { + friend class boost::serialization::access; + template [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off @@ -66,6 +68,7 @@ struct ColumnPartitionGroup { // clang-format on } + public: std::vector metadata; std::map string_columns; @@ -85,7 +88,9 @@ struct ColumnPartitionGroup { ) const; }; -struct ColumnGroup { +class ColumnGroup { + friend class boost::serialization::access; + template [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off @@ -110,6 +115,7 @@ struct ColumnGroup { // clang-format on } + public: std::vector metadata; std::map string_columns; diff --git a/src/silo/config/database_config.test.cpp b/src/silo/config/database_config.test.cpp index 7b057d8c4..34f33bc2f 100644 --- a/src/silo/config/database_config.test.cpp +++ b/src/silo/config/database_config.test.cpp @@ -118,13 +118,14 @@ TEST(DatabaseConfigReader, shouldReadConfigWithCorrectParameters) { TEST(DatabaseConfigReader, shouldThrowExceptionWhenConfigFileDoesNotExist) { ASSERT_THROW( - DatabaseConfigReader().readConfig("testBaseData/does_not_exist.yaml"), std::runtime_error + (void)DatabaseConfigReader().readConfig("testBaseData/does_not_exist.yaml"), + std::runtime_error ); } TEST(DatabaseConfigReader, shouldThrowErrorForInvalidMetadataType) { ASSERT_THROW( - DatabaseConfigReader().readConfig( + (void)DatabaseConfigReader().readConfig( "testBaseData/test_database_config_with_invalid_metadata_type.yaml" ), ConfigException @@ -132,14 +133,14 @@ TEST(DatabaseConfigReader, shouldThrowErrorForInvalidMetadataType) { } TEST(DatabaseConfigReader, shouldNotThrowIfThereAreAdditionalEntries) { - ASSERT_NO_THROW(DatabaseConfigReader().readConfig( + ASSERT_NO_THROW((void)DatabaseConfigReader().readConfig( "testBaseData/test_database_config_with_additional_entries.yaml" )); } TEST(DatabaseConfigReader, shouldThrowIfTheConfigHasAnInvalidStructure) { ASSERT_THROW( - DatabaseConfigReader().readConfig( + (void)DatabaseConfigReader().readConfig( "testBaseData/test_database_config_with_invalid_structure.yaml" ), YAML::InvalidNode diff --git a/src/silo/database.test.cpp b/src/silo/database.test.cpp index 71f804fec..95548757e 100644 --- a/src/silo/database.test.cpp +++ b/src/silo/database.test.cpp @@ -40,10 +40,10 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) { const auto simple_info = database.getDatabaseInfo(); EXPECT_EQ( - detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::A), 9190510 + detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::A), 9498982 ); EXPECT_EQ( - detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::GAP), 5779958 + detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::GAP), 6019718 ); EXPECT_EQ( @@ -63,10 +63,10 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) { ); EXPECT_EQ( - detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_computed, 103449226 + detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_computed, 107509402 ); EXPECT_EQ( - detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_frozen, 55370197 + detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_frozen, 57429359 ); EXPECT_EQ( detailed_info.bitmap_container_size_per_genome_section.bitmap_container_size_statistic @@ -74,7 +74,7 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) { 5859154 ); - EXPECT_EQ(simple_info.total_size, 66467326); + EXPECT_EQ(simple_info.total_size, 68915226); EXPECT_EQ(simple_info.sequence_count, 100); EXPECT_EQ(simple_info.n_bitmaps_size, 3898); } diff --git a/src/silo/preprocessing/pango_lineage_count.test.cpp b/src/silo/preprocessing/pango_lineage_count.test.cpp index 7408d4cf1..2388e5f06 100644 --- a/src/silo/preprocessing/pango_lineage_count.test.cpp +++ b/src/silo/preprocessing/pango_lineage_count.test.cpp @@ -17,13 +17,13 @@ TEST(PangoLineageCounts, buildPangoLineageCounts) { database_config ); - ASSERT_EQ(result.pango_lineage_counts.size(), 24); + ASSERT_EQ(result.pango_lineage_counts.size(), 25); ASSERT_EQ(result.pango_lineage_counts[0].pango_lineage.value, ""); ASSERT_EQ(result.pango_lineage_counts[0].count_of_sequences, 1); ASSERT_EQ(result.pango_lineage_counts[1].pango_lineage.value, "B.1"); ASSERT_EQ(result.pango_lineage_counts[1].count_of_sequences, 3); ASSERT_EQ(result.pango_lineage_counts[7].pango_lineage.value, "B.1.1.7"); ASSERT_EQ(result.pango_lineage_counts[7].count_of_sequences, 48); - ASSERT_EQ(result.pango_lineage_counts[23].pango_lineage.value, "B.1.617.2.9.2"); + ASSERT_EQ(result.pango_lineage_counts[23].pango_lineage.value, "XA.1"); ASSERT_EQ(result.pango_lineage_counts[23].count_of_sequences, 1); } diff --git a/src/silo/preprocessing/partition.cpp b/src/silo/preprocessing/partition.cpp index 64bb13002..d781ad946 100644 --- a/src/silo/preprocessing/partition.cpp +++ b/src/silo/preprocessing/partition.cpp @@ -259,3 +259,64 @@ std::size_t std::hash::operator()( (hash()(partition_chunk.chunk) << 3) + (hash()(partition_chunk.chunk) >> 2); } + +namespace silo::common { + +// NOLINTNEXTLINE(readability-identifier-naming) +void from_json( + const nlohmann::json& js_object, + silo::common::UnaliasedPangoLineage& pango_lineage +) { + pango_lineage.value = js_object; +} + +// NOLINTNEXTLINE(readability-identifier-naming) +void to_json(nlohmann::json& js_object, const silo::common::UnaliasedPangoLineage& pango_lineage) { + js_object = pango_lineage.value; +} +} // namespace silo::common + +template <> +struct nlohmann::adl_serializer { + // NOLINTNEXTLINE(readability-identifier-naming) + static silo::preprocessing::Chunk from_json(const nlohmann::json& js_object) { + return silo::preprocessing::Chunk{ + js_object["lineages"].template get>(), + js_object["count"].template get()}; + } + + // NOLINTNEXTLINE(readability-identifier-naming) + static void to_json(nlohmann::json& js_object, const silo::preprocessing::Chunk& chunk) { + js_object["lineages"] = chunk.getPangoLineages(); + js_object["count"] = chunk.getCountOfSequences(); + } +}; + +template <> +struct nlohmann::adl_serializer { + // NOLINTNEXTLINE(readability-identifier-naming) + static silo::preprocessing::Partition from_json(const nlohmann::json& js_object) { + return silo::preprocessing::Partition{ + js_object.template get>()}; + } + + // NOLINTNEXTLINE(readability-identifier-naming) + static void to_json(nlohmann::json& js_object, const silo::preprocessing::Partition& partition) { + js_object = partition.getChunks(); + } +}; + +namespace silo::preprocessing { + +void Partitions::save(std::ostream& output_file) const { + const nlohmann::json json(partitions); + output_file << json.dump(4); +} + +Partitions Partitions::load(std::istream& input_file) { + nlohmann::json json; + json = nlohmann::json::parse(input_file); + const std::vector partitions = json.get>(); + return Partitions{partitions}; +} +} // namespace silo::preprocessing diff --git a/src/silo/preprocessing/partition.test.cpp b/src/silo/preprocessing/partition.test.cpp index f3c3e8652..e37917d40 100644 --- a/src/silo/preprocessing/partition.test.cpp +++ b/src/silo/preprocessing/partition.test.cpp @@ -1,5 +1,6 @@ #include "silo/preprocessing/partition.h" +#include "silo/common/pango_lineage.h" #include "silo/preprocessing/preprocessing_config.h" #include @@ -11,12 +12,24 @@ using silo::preprocessing::Partitions; Partitions createSimplePartitionsObject() { std::vector chunks1; - chunks1.emplace_back(std::vector{{"A.1", "A.3", "B.1", "A.2"}}, 8); - chunks1.emplace_back(std::vector{{"B.2", "C.3", "C.1", "C.2"}}, 11123); + chunks1.emplace_back( + std::vector{{{"A.1"}, {"A.3"}, {"B.1"}, {"A.2"}}}, 8 + ); + chunks1.emplace_back( + std::vector{{{"B.2"}, {"C.3"}, {"C.1"}, {"C.2"}}}, 11123 + ); Partition partition1(std::move(chunks1)); std::vector chunks2; - chunks2.emplace_back(std::vector{{"XY.1", "XY.3", "XY.A.A.A.3", "XY.2312"}}, 123); - chunks2.emplace_back(std::vector{{"XT.1", "XT.3", "XTA.A.3", "XT.2312"}}, 512); + chunks2.emplace_back( + std::vector{ + {{"XY.1"}, {"XY.3"}, {"XY.A.A.A.3"}, {"XY.2312"}}}, + 123 + ); + chunks2.emplace_back( + std::vector{ + {{"XT.1"}, {"XT.3"}, {"XTA.A.3"}, {"XT.2312"}}}, + 512 + ); Partition partition2(std::move(chunks2)); return Partitions{{std::move(partition1), std::move(partition2)}}; } @@ -81,7 +94,7 @@ TEST(Partitions, shouldSaveSimpleConfig) { ASSERT_EQ(file_contents, expected_file_contents); } -TEST(Partitions, shouldSaveAndLoadSimpleConfig) { +TEST(Partitions, shouldSaveAndLoadSimplePartitionsObject) { const Partitions partitions = createSimplePartitionsObject(); std::ofstream out_file("output/test.partitions"); partitions.save(out_file); diff --git a/src/silo/storage/database_partition.cpp b/src/silo/storage/database_partition.cpp index e136fa07e..a6b3decf9 100644 --- a/src/silo/storage/database_partition.cpp +++ b/src/silo/storage/database_partition.cpp @@ -16,7 +16,7 @@ class StringColumnPartition; } // namespace storage::column DatabasePartition::DatabasePartition(std::vector chunks) - : chunks(chunks) {} + : chunks(std::move(chunks)) {} const std::vector& DatabasePartition::getChunks() const { return chunks;