From b3aa67556539b53de403a389a10950fc6558af20 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sat, 18 Mar 2023 10:21:21 +0100 Subject: [PATCH] Fix block size bug (#915) The block sizes for the OPS, OSP, SOP, and SPO permutations were too small because of a bug in how it was determined when to end an block. For example, for the current Wikidata index as of this writing, the PSO and POS permutation have 48,769 blocks each, but the SPO and SOP permutations have only 1967 blocks each. This bug is fixed now. Co-authored-by: Johannes Kalmbach --- src/index/CompressedRelation.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index dab726bbc7..4dee23172f 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -332,18 +332,27 @@ void CompressedRelationWriter::addRelation(Id col0Id, // explicitly below. CompressedRelationMetadata metaData{col0Id, col1And2Ids.numRows(), multC1, multC2}; - auto sizeOfRelation = - col1And2Ids.numRows() * col1And2Ids.numColumns() * sizeof(Id); + + // Determine the number of bytes the IDs stored in an IdTable consume. + // The return type is double because we use the result to compare it with + // other doubles below. + auto sizeInBytes = [](const auto& table) { + return static_cast(table.numRows() * table.numColumns() * + sizeof(Id)); + }; // If this is a large relation, or the currrently buffered relations + // this relation are too large, we will write the buffered relations to file // and start a new block. - if (sizeOfRelation > _numBytesPerBlock * 8 / 10 || - sizeOfRelation + _buffer.numRows() > 1.5 * _numBytesPerBlock) { + bool relationHasExclusiveBlocks = + sizeInBytes(col1And2Ids) > 0.8 * static_cast(_numBytesPerBlock); + if (relationHasExclusiveBlocks || + sizeInBytes(col1And2Ids) + sizeInBytes(_buffer) > + static_cast(_numBytesPerBlock) * 1.5) { writeBufferedRelationsToSingleBlock(); } - if (sizeOfRelation > _numBytesPerBlock * 8 / 10) { + if (relationHasExclusiveBlocks) { // The relation is large, immediately write the relation to a set of // exclusive blocks. writeRelationToExclusiveBlocks(col0Id, col1And2Ids);