Skip to content

Commit

Permalink
fix: bug where sequence reconstruction is false when the flipped bitm…
Browse files Browse the repository at this point in the history
…ap is different from the reference sequence symbol
  • Loading branch information
Taepper committed Aug 10, 2023
1 parent bca4961 commit edac58c
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 18 deletions.
15 changes: 9 additions & 6 deletions endToEndTests/test/queries/fastaAligned_multiple.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
"query": {
"action": {
"type": "FastaAligned",
"sequenceName": ["testSecondSequence", "S"]
"sequenceName": [
"testSecondSequence",
"S"
]
},
"filterExpression": {
"type": "IntBetween",
Expand All @@ -14,24 +17,24 @@
},
"expectedQueryResult": [
{
"S": "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAI--SGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGV-YHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTYGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIDDTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPINFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILARLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTHNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*",
"S": "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAI--SGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGV-YHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTYGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIDDTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSHRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPINFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILARLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTHNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*",
"gisaid_epi_isl": "EPI_ISL_1408408",
"testSecondSequence": "ACGT"
},
{
"S": "MFVFLVLLPLVSSQCVNLITRTQ---SYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLDVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLGRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKXXXXXXXXXXXXXXNKLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGNTPCNGVAGFNCYFPLRSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEYVNNSYECDIPIGAGICASYQTQTKSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLKRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKYFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNHNAQALNTLVKQLSSKFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*",
"S": "MFVFLVLLPLVSSQCVNLITRTQ---SYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLDVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLGRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKXXXXXXXXXXXXXXNKLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEYVNNSYECDIPIGAGICASYQTQTKSHRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLKRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKYFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNHNAQALNTLVKQLSSKFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*",
"gisaid_epi_isl": "EPI_ISL_1749899",
"testSecondSequence": "AAGN"
},
{


"gisaid_epi_isl": "EPI_ISL_1749892",
"testSecondSequence": "ACGT"
},
{
"S": "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAI--SGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGV-YHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTYGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIDDTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPINFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILARLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTHNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*",
"S": "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAI--SGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGV-YHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTYGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIDDTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSHRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPINFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILARLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTHNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*",
"gisaid_epi_isl": "EPI_ISL_2016901",
"testSecondSequence": "ACGT"
}
]
}
}
4 changes: 3 additions & 1 deletion include/silo/storage/aa_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class AAPosition {
AASymbolMap<roaring::Roaring> bitmaps;
std::optional<AA_SYMBOL> symbol_whose_bitmap_is_flipped = std::nullopt;

void flipMostNumerousBitmap(uint32_t sequence_count);
std::optional<silo::AA_SYMBOL> flipMostNumerousBitmap(uint32_t sequence_count);
};

class AAStorePartition {
Expand All @@ -58,6 +58,7 @@ class AAStorePartition {
void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) {
// clang-format off
archive & sequence_count;
archive & indexing_differences_to_reference_sequence;
archive & positions;
archive & aa_symbol_x_bitmaps;
// clang-format on
Expand All @@ -71,6 +72,7 @@ class AAStorePartition {
explicit AAStorePartition(const std::vector<AA_SYMBOL>& reference_sequence);

const std::vector<AA_SYMBOL>& reference_sequence;
std::vector<std::pair<size_t, AA_SYMBOL>> indexing_differences_to_reference_sequence;
std::vector<AAPosition> positions;
std::vector<roaring::Roaring> aa_symbol_x_bitmaps;
uint32_t sequence_count = 0;
Expand Down
4 changes: 3 additions & 1 deletion include/silo/storage/sequence_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class NucPosition {
NucleotideSymbolMap<roaring::Roaring> bitmaps;
std::optional<NUCLEOTIDE_SYMBOL> symbol_whose_bitmap_is_flipped = std::nullopt;

void flipMostNumerousBitmap(uint32_t sequence_count);
std::optional<silo::NUCLEOTIDE_SYMBOL> flipMostNumerousBitmap(uint32_t sequence_count);
};

struct SequenceStoreInfo {
Expand All @@ -64,6 +64,7 @@ class SequenceStorePartition {
void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) {
// clang-format off
archive & positions;
archive & indexing_differences_to_reference_genome;
archive & nucleotide_symbol_n_bitmaps;
archive & sequence_count;
// clang-format on
Expand All @@ -77,6 +78,7 @@ class SequenceStorePartition {
explicit SequenceStorePartition(const std::vector<NUCLEOTIDE_SYMBOL>& reference_genome);

const std::vector<NUCLEOTIDE_SYMBOL>& reference_genome;
std::vector<std::pair<size_t, NUCLEOTIDE_SYMBOL>> indexing_differences_to_reference_genome;
std::vector<NucPosition> positions;
std::vector<roaring::Roaring> nucleotide_symbol_n_bitmaps;
uint32_t sequence_count = 0;
Expand Down
7 changes: 3 additions & 4 deletions src/silo/query_engine/actions/aa_mutations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,9 @@ void from_json(const nlohmann::json& json, std::unique_ptr<AAMutations>& action)
for (const auto& child : json["sequenceName"]) {
CHECK_SILO_QUERY(
child.is_string(),
"AminoAcidMutations action can have the field sequenceName of type string or an array "
"of "
"strings, but no other type; while parsing array encountered the element " +
child.dump() + " which is not of type string"
"The field sequenceName of AminoAcidMutations action must have type string or an "
"array, if present. Found:" +
child.dump()
)
sequence_names.emplace_back(child.get<std::string>());
}
Expand Down
9 changes: 9 additions & 0 deletions src/silo/query_engine/actions/fasta_aligned.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ std::string reconstructNucSequence(
silo::nucleotideSymbolToChar
);

for (const auto& [position_id, symbol] :
sequence_store.indexing_differences_to_reference_genome) {
reconstructed_sequence[position_id] = nucleotideSymbolToChar(symbol);
}

tbb::
parallel_for(
tbb::blocked_range<size_t>(0, sequence_store.positions.size()),
Expand Down Expand Up @@ -74,6 +79,10 @@ std::string reconstructAASequence(const AAStorePartition& aa_store, uint32_t seq
silo::aaSymbolToChar
);

for (const auto& [position_id, symbol] : aa_store.indexing_differences_to_reference_sequence) {
reconstructed_sequence[position_id] = aaSymbolToChar(symbol);
}

tbb::
parallel_for(
tbb::blocked_range<size_t>(0, aa_store.positions.size()),
Expand Down
4 changes: 3 additions & 1 deletion src/silo/storage/aa_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ silo::AAPosition::AAPosition(std::optional<AA_SYMBOL> symbol) {
symbol_whose_bitmap_is_flipped = symbol;
}

void silo::AAPosition::flipMostNumerousBitmap(uint32_t sequence_count) {
std::optional<silo::AA_SYMBOL> silo::AAPosition::flipMostNumerousBitmap(uint32_t sequence_count) {
std::optional<AA_SYMBOL> previous_flipped_bitmap_symbol = symbol_whose_bitmap_is_flipped;
std::optional<AA_SYMBOL> new_flipped_bitmap_symbol = std::nullopt;
uint32_t max_count = 0;
Expand Down Expand Up @@ -50,7 +50,9 @@ void silo::AAPosition::flipMostNumerousBitmap(uint32_t sequence_count) {
bitmaps[*new_flipped_bitmap_symbol].shrinkToFit();
}
symbol_whose_bitmap_is_flipped = new_flipped_bitmap_symbol;
return symbol_whose_bitmap_is_flipped;
}
return std::nullopt;
}

silo::AAStorePartition::AAStorePartition(const std::vector<AA_SYMBOL>& reference_sequence)
Expand Down
33 changes: 29 additions & 4 deletions src/silo/storage/database_partition.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "silo/storage/database_partition.h"

#include <tbb/enumerable_thread_specific.h>
#include <tbb/parallel_for.h>

#include "silo/storage/column_group.h"
Expand All @@ -22,20 +23,44 @@ DatabasePartition::DatabasePartition(std::vector<silo::preprocessing::Chunk> chu

void DatabasePartition::flipBitmaps() {
for (auto& [_, seq_store] : nuc_sequences) {
tbb::enumerable_thread_specific<decltype(seq_store.indexing_differences_to_reference_genome)>
flipped_bitmaps;

auto& positions = seq_store.positions;
tbb::parallel_for(tbb::blocked_range<uint32_t>(0, positions.size()), [&](const auto& local) {
auto& local_flipped_bitmaps = flipped_bitmaps.local();
for (auto position = local.begin(); position != local.end(); ++position) {
positions[position].flipMostNumerousBitmap(sequence_count);
auto flipped_symbol = positions[position].flipMostNumerousBitmap(sequence_count);
if (flipped_symbol.has_value()) {
local_flipped_bitmaps.emplace_back(position, *flipped_symbol);
}
}
});
for (const auto& local : flipped_bitmaps) {
for (auto& element : local) {
seq_store.indexing_differences_to_reference_genome.emplace_back(element);
}
}
}
for (auto& [_, seq_store] : aa_sequences) {
auto& positions = seq_store.positions;
for (auto& [_, aa_store] : aa_sequences) {
tbb::enumerable_thread_specific<decltype(aa_store.indexing_differences_to_reference_sequence)>
flipped_bitmaps;

auto& positions = aa_store.positions;
tbb::parallel_for(tbb::blocked_range<uint32_t>(0, positions.size()), [&](const auto& local) {
auto& local_flipped_bitmaps = flipped_bitmaps.local();
for (auto position = local.begin(); position != local.end(); ++position) {
positions[position].flipMostNumerousBitmap(sequence_count);
auto flipped_symbol = positions[position].flipMostNumerousBitmap(sequence_count);
if (flipped_symbol.has_value()) {
local_flipped_bitmaps.emplace_back(position, *flipped_symbol);
}
}
});
for (const auto& local : flipped_bitmaps) {
for (auto& element : local) {
aa_store.indexing_differences_to_reference_sequence.emplace_back(element);
}
}
}
}

Expand Down
6 changes: 5 additions & 1 deletion src/silo/storage/sequence_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ silo::NucPosition::NucPosition(std::optional<NUCLEOTIDE_SYMBOL> symbol) {
symbol_whose_bitmap_is_flipped = symbol;
}

void silo::NucPosition::flipMostNumerousBitmap(uint32_t sequence_count) {
std::optional<silo::NUCLEOTIDE_SYMBOL> silo::NucPosition::flipMostNumerousBitmap(
uint32_t sequence_count
) {
std::optional<NUCLEOTIDE_SYMBOL> flipped_bitmap_before = symbol_whose_bitmap_is_flipped;
std::optional<NUCLEOTIDE_SYMBOL> max_symbol = std::nullopt;
uint32_t max_count = 0;
Expand Down Expand Up @@ -51,7 +53,9 @@ void silo::NucPosition::flipMostNumerousBitmap(uint32_t sequence_count) {
bitmaps[*max_symbol].shrinkToFit();
}
symbol_whose_bitmap_is_flipped = max_symbol;
return symbol_whose_bitmap_is_flipped;
}
return std::nullopt;
}

silo::SequenceStorePartition::SequenceStorePartition(
Expand Down

0 comments on commit edac58c

Please sign in to comment.