Skip to content

Commit

Permalink
fix: no longer have regression when no bitmap flipped is most efficient
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed Aug 3, 2023
1 parent 34830ab commit b653753
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 94 deletions.
24 changes: 12 additions & 12 deletions endToEndTests/test/info.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ describe('The /info endpoint', () => {
.get('/info')
.expect(200)
.expect('Content-Type', 'application/json')
.expect({ nBitmapsSize: 3898, sequenceCount: 100, totalSize: 60074145 })
.expect({ nBitmapsSize: 3898, sequenceCount: 100, totalSize: 60055044 })
.end(done);
});

Expand All @@ -26,15 +26,15 @@ describe('The /info endpoint', () => {
'bitmapContainerSizeStatistic'
);
expect(returnedInfo.bitmapContainerSizePerGenomeSection.bitmapContainerSizeStatistic).to.deep.equal({
numberOfArrayContainers: 47970,
numberOfBitsetContainers: 209,
numberOfRunContainers: 209,
numberOfValuesStoredInArrayContainers: 64283,
numberOfArrayContainers: 43545,
numberOfBitsetContainers: 0,
numberOfRunContainers: 78,
numberOfValuesStoredInArrayContainers: 59694,
numberOfValuesStoredInBitsetContainers: 0,
numberOfValuesStoredInRunContainers: 2410,
totalBitmapSizeArrayContainers: 128566,
numberOfValuesStoredInRunContainers: 2237,
totalBitmapSizeArrayContainers: 119388,
totalBitmapSizeBitsetContainers: 0,
totalBitmapSizeRunContainers: 3538,
totalBitmapSizeRunContainers: 2964,
});

expect(returnedInfo.bitmapContainerSizePerGenomeSection).to.have.property(
Expand Down Expand Up @@ -62,18 +62,18 @@ describe('The /info endpoint', () => {
expect(returnedInfo).to.have.property('bitmapSizePerSymbol');
expect(returnedInfo.bitmapSizePerSymbol).to.deep.equal({
'-': 6003470,
'A': 6127203,
'A': 6112681,
'B': 5980600,
'C': 6073069,
'C': 6064603,
'D': 5980600,
'G': 6075909,
'G': 6067693,
'H': 5980600,
'K': 5980630,
'M': 5980620,
'N': 5980600,
'R': 5980620,
'S': 5980600,
'T': 6139332,
'T': 6125253,
'V': 5980600,
'W': 5980600,
'Y': 5980620,
Expand Down
12 changes: 6 additions & 6 deletions src/silo/database.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) {
const auto simple_info = database.getDatabaseInfo();

EXPECT_EQ(
detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::A), 6127203
detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::A), 6112681
);
EXPECT_EQ(
detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::GAP), 6003470
Expand All @@ -54,7 +54,7 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) {
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.bitmap_container_size_statistic
.number_of_values_stored_in_run_containers,
2410
2237
);
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.bitmap_container_size_statistic
Expand All @@ -63,18 +63,18 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) {
);

EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_computed, 96205673
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_computed, 96160390
);
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_frozen, 48217381
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_frozen, 48185111
);
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.bitmap_container_size_statistic
.total_bitmap_size_array_containers,
128566
119388
);

EXPECT_EQ(simple_info.total_size, 60074145);
EXPECT_EQ(simple_info.total_size, 60055044);
EXPECT_EQ(simple_info.sequence_count, 100);
EXPECT_EQ(simple_info.n_bitmaps_size, 3898);
}
Expand Down
51 changes: 10 additions & 41 deletions src/silo/storage/aa_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,26 @@ void silo::AAPosition::flipMostNumerousBitmap(uint32_t sequence_count) {
for (const auto& symbol : AA_SYMBOLS) {
roaring::Roaring bitmap = bitmaps.at(symbol);
bitmap.runOptimize();
bitmap.shrinkToFit();
const uint32_t count = flipped_bitmap_before == symbol ? sequence_count - bitmap.cardinality()
: bitmap.cardinality();
if (count > max_count) {
max_symbol = symbol;
max_count = count;
}
}
if (max_symbol.has_value() && max_symbol != flipped_bitmap_before) {
if (max_symbol != flipped_bitmap_before) {
if (flipped_bitmap_before.has_value()) {
bitmaps[*flipped_bitmap_before].flip(0, sequence_count);
bitmaps[*flipped_bitmap_before].runOptimize();
bitmaps[*flipped_bitmap_before].shrinkToFit();
}
if (max_symbol.has_value()) {
bitmaps[*max_symbol].flip(0, sequence_count);
bitmaps[*max_symbol].runOptimize();
bitmaps[*max_symbol].shrinkToFit();
}
symbol_whose_bitmap_is_flipped = max_symbol;
bitmaps[*max_symbol].flip(0, sequence_count);
bitmaps[*max_symbol].runOptimize();
}
}

Expand Down Expand Up @@ -148,6 +154,7 @@ void silo::AAStorePartition::fillXBitmaps(const std::vector<std::string>& sequen
positions_with_aa_symbol_x.size(), positions_with_aa_symbol_x.data()
);
aa_symbol_x_bitmaps[sequence_count + sequence_id].runOptimize();
aa_symbol_x_bitmaps[sequence_count + sequence_id].shrinkToFit();
positions_with_aa_symbol_x.clear();
}
}
Expand All @@ -160,44 +167,6 @@ void silo::AAStorePartition::interpret(const std::vector<std::string>& sequences
sequence_count += sequences.size();
}

size_t silo::AAStorePartition::computeSize() const {
size_t result = 0;
for (const auto& position : positions) {
for (const AA_SYMBOL symbol : AA_SYMBOLS) {
result += position.bitmaps.at(symbol).getSizeInBytes(false);
}
}
return result;
}

size_t silo::AAStorePartition::runOptimize() {
std::atomic<size_t> count_true = 0;
tbb::parallel_for(tbb::blocked_range<size_t>(0U, positions.size()), [&](const auto& local) {
for (auto position = local.begin(); position != local.end(); ++position) {
for (const AA_SYMBOL symbol : AA_SYMBOLS) {
if (positions[position].bitmaps[symbol].runOptimize()) {
++count_true;
}
}
}
});
return count_true;
}

size_t silo::AAStorePartition::shrinkToFit() {
std::atomic<size_t> saved = 0;
tbb::parallel_for(tbb::blocked_range<size_t>(0U, positions.size()), [&](const auto& local) {
size_t local_saved = 0;
for (auto position = local.begin(); position != local.end(); ++position) {
for (const AA_SYMBOL symbol : AA_SYMBOLS) {
local_saved += positions[position].bitmaps[symbol].shrinkToFit();
}
}
saved += local_saved;
});
return saved;
}

silo::AAStore::AAStore(std::vector<AA_SYMBOL> reference_sequence)
: reference_sequence(std::move(reference_sequence)) {}

Expand Down
44 changes: 10 additions & 34 deletions src/silo/storage/sequence_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,28 @@ void silo::NucPosition::flipMostNumerousBitmap(uint32_t sequence_count) {
uint32_t max_count = 0;

for (const auto& symbol : NUC_SYMBOLS) {
roaring::Roaring bitmap = bitmaps.at(symbol);
roaring::Roaring& bitmap = bitmaps[symbol];
bitmap.runOptimize();
bitmap.shrinkToFit();
const uint32_t count = flipped_bitmap_before == symbol ? sequence_count - bitmap.cardinality()
: bitmap.cardinality();
if (count > max_count) {
max_symbol = symbol;
max_count = count;
}
}
if (max_symbol.has_value() && max_symbol != flipped_bitmap_before) {
if (max_symbol != flipped_bitmap_before) {
if (flipped_bitmap_before.has_value()) {
bitmaps[*flipped_bitmap_before].flip(0, sequence_count);
bitmaps[*flipped_bitmap_before].runOptimize();
bitmaps[*flipped_bitmap_before].shrinkToFit();
}
if (max_symbol.has_value()) {
bitmaps[*max_symbol].flip(0, sequence_count);
bitmaps[*max_symbol].runOptimize();
bitmaps[*max_symbol].shrinkToFit();
}
symbol_whose_bitmap_is_flipped = max_symbol;
bitmaps[*max_symbol].flip(0, sequence_count);
bitmaps[*max_symbol].runOptimize();
}
}

Expand Down Expand Up @@ -199,36 +205,6 @@ size_t silo::SequenceStorePartition::computeSize() const {
return result;
}

size_t silo::SequenceStorePartition::runOptimize() {
std::atomic<size_t> count_true = 0;
const tbb::blocked_range<size_t> range(0U, positions.size());
tbb::parallel_for(range, [&](const decltype(range) local) {
for (auto position = local.begin(); position != local.end(); ++position) {
for (const NUCLEOTIDE_SYMBOL symbol : NUC_SYMBOLS) {
if (positions[position].bitmaps[symbol].runOptimize()) {
++count_true;
}
}
}
});
return count_true;
}

size_t silo::SequenceStorePartition::shrinkToFit() {
std::atomic<size_t> saved = 0;
const tbb::blocked_range<size_t> range(0U, positions.size());
tbb::parallel_for(range, [&](const decltype(range) local) {
size_t local_saved = 0;
for (auto position = local.begin(); position != local.end(); ++position) {
for (const NUCLEOTIDE_SYMBOL symbol : NUC_SYMBOLS) {
local_saved += positions[position].bitmaps[symbol].shrinkToFit();
}
}
saved += local_saved;
});
return saved;
}

silo::SequenceStore::SequenceStore(std::vector<NUCLEOTIDE_SYMBOL> reference_genome)
: reference_genome(std::move(reference_genome)) {}

Expand Down
2 changes: 1 addition & 1 deletion src/silo_api/info_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void to_json(nlohmann::json& json, const BitmapContainerSizeStatistic& statistic
json = nlohmann::json{
{"numberOfArrayContainers", statistics.number_of_array_containers},
{"numberOfRunContainers", statistics.number_of_run_containers},
{"numberOfBitsetContainers", statistics.number_of_run_containers},
{"numberOfBitsetContainers", statistics.number_of_bitset_containers},
{"numberOfValuesStoredInArrayContainers",
statistics.number_of_values_stored_in_array_containers},
{"numberOfValuesStoredInRunContainers", statistics.number_of_values_stored_in_run_containers},
Expand Down

0 comments on commit b653753

Please sign in to comment.