diff --git a/src/engine/AddCombinedRowToTable.h b/src/engine/AddCombinedRowToTable.h index c43e298a88..45f51fb527 100644 --- a/src/engine/AddCombinedRowToTable.h +++ b/src/engine/AddCombinedRowToTable.h @@ -155,8 +155,13 @@ class AddCombinedRowToIdTable { if (nextIndex_ != 0) { AD_CORRECTNESS_CHECK(inputLeftAndRight_.has_value()); flush(); - } else { - // Clear vocab when no rows were written. + } else if (resultTable_.empty()) { + // Clear local vocab when no rows were written. + // + // TODO This is a conservative approach. We could + // optimize this case (clear the local vocab more often, but still + // correctly) by considering the situation after all the relevant inputs + // have been processed. mergedVocab_ = LocalVocab{}; } } diff --git a/test/AddCombinedRowToTableTest.cpp b/test/AddCombinedRowToTableTest.cpp index 2c409a830b..4bf72adcf9 100644 --- a/test/AddCombinedRowToTableTest.cpp +++ b/test/AddCombinedRowToTableTest.cpp @@ -323,3 +323,39 @@ TEST(AddCombinedRowToTable, verifyLocalVocabIsRetainedWhenNotMoving) { EXPECT_TRUE(vocabContainsString(localVocab, "b")); EXPECT_THAT(localVocab.getAllWordsForTesting(), ::testing::SizeIs(2)); } + +// _____________________________________________________________________________ +TEST(AddCombinedRowToTable, localVocabIsOnlyClearedWhenLegal) { + auto outputTable = makeIdTableFromVector({}); + outputTable.setNumColumns(3); + ad_utility::AddCombinedRowToIdTable adder{ + 1, std::move(outputTable), + std::make_shared>(), 1}; + + IdTableWithVocab input1{makeIdTableFromVector({{0, 1}}), + createVocabWithSingleString("a")}; + IdTableWithVocab input2{makeIdTableFromVector({{0, 2}}), + createVocabWithSingleString("b")}; + + adder.setInput(input1, input2); + adder.addRow(0, 0); + IdTableWithVocab input3{makeIdTableFromVector({{3, 1}}), + createVocabWithSingleString("c")}; + IdTableWithVocab input4{makeIdTableFromVector({{3, 2}}), + createVocabWithSingleString("d")}; + // NOTE: This seemingly redundant call to `setInput` is important, as it tests + // a previous bug: Each call to `setInput` implicitly also calls `flush` and + // also possibly clears the local vocab if it is not used anymore. In this + // case however we may not clear the local vocab, as the result of the + // previous calls to `addRow` has not yet been extracted. + adder.setInput(input1, input2); + adder.setInput(input3, input4); + adder.addRow(0, 0); + auto localVocab = adder.localVocab().clone(); + + EXPECT_TRUE(vocabContainsString(localVocab, "a")); + EXPECT_TRUE(vocabContainsString(localVocab, "b")); + EXPECT_TRUE(vocabContainsString(localVocab, "c")); + EXPECT_TRUE(vocabContainsString(localVocab, "d")); + EXPECT_THAT(localVocab.getAllWordsForTesting(), ::testing::SizeIs(4)); +}