diff --git a/src/yb/common/hybrid_time.h b/src/yb/common/hybrid_time.h index 4c9b43164216..6220b645f760 100644 --- a/src/yb/common/hybrid_time.h +++ b/src/yb/common/hybrid_time.h @@ -164,6 +164,8 @@ class HybridTime { default: return false; } + LOG(FATAL) << "Should never happen"; + return false; // Never reached. } bool operator <(const HybridTime& other) const { @@ -242,6 +244,16 @@ inline std::ostream &operator <<(std::ostream &o, const HybridTime &hybridTime) return o << hybridTime.ToString(); } +namespace hybrid_time_literals { + +inline HybridTime operator "" _usec_ht(unsigned long long microseconds) { // NOLINT + return HybridTime::FromMicros(microseconds); +} + +} // namespace hybrid_time_literals + +using hybrid_time_literals::operator"" _usec_ht; + } // namespace yb #endif // YB_COMMON_HYBRID_TIME_H_ diff --git a/src/yb/docdb/doc_operation-test.cc b/src/yb/docdb/doc_operation-test.cc index fe16d111fa0c..43919d2cbeb7 100644 --- a/src/yb/docdb/doc_operation-test.cc +++ b/src/yb/docdb/doc_operation-test.cc @@ -548,7 +548,7 @@ void DocOperationRangeFilterTest::TestWithSortingType(ColumnSchema::SortingType { row.k, row.r, row.v }, 1000, t); - ASSERT_OK(FlushRocksDB()); + ASSERT_OK(FlushRocksDbAndWait()); } std::vector live_files; rocksdb()->GetLiveFilesMetaData(&live_files); @@ -651,7 +651,7 @@ SubDocKey(DocKey(0x0000, [1], []), [ColumnId(2); HT{ physical: 1000 w: 2 }]) -> SubDocKey(DocKey(0x0000, [1], []), [ColumnId(3); HT{ physical: 1000 w: 3 }]) -> 3; ttl: 1.000s )#"); - CompactHistoryBefore(t1); + FullyCompactHistoryBefore(t1); // Verify all entries are purged. AssertDocDbDebugDumpStrEq(R"#( @@ -697,7 +697,7 @@ SubDocKey(DocKey(0x0000, [1], []), [ColumnId(3); HT{ physical: 1000 logical: 1 w SubDocKey(DocKey(0x0000, [1], []), [ColumnId(3); HT{ physical: 1000 w: 3 }]) -> 3; ttl: 1.000s )#"); - CompactHistoryBefore(t1); + FullyCompactHistoryBefore(t1); // Verify the rest of the columns still live. AssertDocDbDebugDumpStrEq(R"#( @@ -746,7 +746,7 @@ size_t GenerateFiles(int total_batches, DocOperationTest* test) { test->WriteQLRow( QLWriteRequestPB_QLStmtType_QL_STMT_INSERT, schema, {j, j, j, j}, 1000000, t0); } - EXPECT_OK(test->FlushRocksDB()); + EXPECT_OK(test->FlushRocksDbAndWait()); } return expected_files; } diff --git a/src/yb/docdb/docdb-test.cc b/src/yb/docdb/docdb-test.cc index 25abfc16c573..eae384388cec 100644 --- a/src/yb/docdb/docdb-test.cc +++ b/src/yb/docdb/docdb-test.cc @@ -37,6 +37,7 @@ #include "yb/util/string_trim.h" #include "yb/util/test_macros.h" #include "yb/util/test_util.h" +#include "yb/util/strongly_typed_bool.h" using std::cout; using std::endl; @@ -60,12 +61,17 @@ DECLARE_int32(max_nexts_to_avoid_seek); namespace yb { namespace docdb { +using PV = PrimitiveValue; + CHECKED_STATUS GetPrimitiveValue(const rocksdb::UserBoundaryValues &values, size_t index, PrimitiveValue *out); CHECKED_STATUS GetDocHybridTime(const rocksdb::UserBoundaryValues &values, DocHybridTime *out); -class DocDBTest: public DocDBTestBase { +YB_STRONGLY_TYPED_BOOL(InitMarkerExpired); +YB_STRONGLY_TYPED_BOOL(UseIntermediateFlushes); + +class DocDBTest : public DocDBTestBase { protected: DocDBTest() { SeedRandom(); @@ -186,19 +192,19 @@ SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_d"; HT{ physica #undef SET_VALUE ASSERT_OK(InsertSubDocument( - DocPath(encoded_doc_key), root, HybridTime::FromMicros(1000))); + DocPath(encoded_doc_key), root, 1000_usec_ht)); // The Insert above could have been an Extend with no difference in external behavior. // Internally however, an insert writes an extra key (with value tombstone). ASSERT_OK(SetPrimitive( DocPath(encoded_doc_key, PrimitiveValue("a"), PrimitiveValue("2")), - Value(PrimitiveValue(11)), HybridTime::FromMicros(2000))); + Value(PrimitiveValue(11)), 2000_usec_ht)); ASSERT_OK(InsertSubDocument(DocPath(encoded_doc_key, PrimitiveValue("b")), b2, - HybridTime::FromMicros(3000))); + 3000_usec_ht)); ASSERT_OK(ExtendSubDocument(DocPath(encoded_doc_key, PrimitiveValue("a")), f, - HybridTime::FromMicros(4000))); + 4000_usec_ht)); ASSERT_OK(SetPrimitive( DocPath(encoded_doc_key, PrimitiveValue("b"), PrimitiveValue("e"), PrimitiveValue("2")), - Value(PrimitiveValue::kTombstone), HybridTime::FromMicros(5000))); + Value(PrimitiveValue::kTombstone), 5000_usec_ht)); } void VerifySubDocument(SubDocKey subdoc_key, HybridTime ht, string subdoc_string) { @@ -262,8 +268,8 @@ SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_d"; HT{ physica const DocKey doc_key(PrimitiveValues("mydockey")); KeyBytes encoded_doc_key(doc_key.Encode()); ASSERT_OK(SetPrimitive(DocPath(encoded_doc_key, PrimitiveValue(GetInetAddress(strval))), - PrimitiveValue(), - HybridTime::FromMicros(1000))); + PrimitiveValue(), + 1000_usec_ht)); } // Inserts a bunch of subkeys starting with the provided doc key. It also, fills out the @@ -283,6 +289,86 @@ SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_d"; HT{ physica *expected_docdb_str += "\n"; } } + + static constexpr int kNumSubKeysForCollectionsWithTTL = 3; + + void SetUpCollectionWithTTL(DocKey collection_key, UseIntermediateFlushes intermediate_flushes) { + SubDocument subdoc; + for (int i = 0; i < kNumSubKeysForCollectionsWithTTL; i++) { + string key = "k" + std::to_string(i); + string value = "v" + std::to_string(i); + subdoc.SetChildPrimitive(PrimitiveValue(key), PrimitiveValue(value)); + } + ASSERT_OK(InsertSubDocument(DocPath(collection_key.Encode()), subdoc, 1000_usec_ht, 10s)); + + AssertDocDbDebugDumpStrEq(Format(R"#( + SubDocKey($0, [HT{ physical: 1000 }]) -> {}; ttl: 10.000s + SubDocKey($0, ["k0"; HT{ physical: 1000 w: 1 }]) -> "v0"; ttl: 10.000s + SubDocKey($0, ["k1"; HT{ physical: 1000 w: 2 }]) -> "v1"; ttl: 10.000s + SubDocKey($0, ["k2"; HT{ physical: 1000 w: 3 }]) -> "v2"; ttl: 10.000s + )#", collection_key.ToString())); + if (intermediate_flushes) { + ASSERT_OK(FlushRocksDbAndWait()); + } + + // Set separate TTLs for each element. + for (int i = 0; i < kNumSubKeysForCollectionsWithTTL; i++) { + SubDocument subdoc; + string key = "k" + std::to_string(i); + string value = "vv" + std::to_string(i); + subdoc.SetChildPrimitive(PrimitiveValue(key), PrimitiveValue(value)); + ASSERT_OK(ExtendSubDocument( + DocPath(collection_key.Encode()), subdoc, 1100_usec_ht, + MonoDelta::FromSeconds(20 + i))); + if (intermediate_flushes) { + ASSERT_OK(FlushRocksDbAndWait()); + } + } + + // Add new keys as well. + for (int i = kNumSubKeysForCollectionsWithTTL; i < kNumSubKeysForCollectionsWithTTL * 2; i++) { + SubDocument subdoc; + string key = "k" + std::to_string(i); + string value = "vv" + std::to_string(i); + subdoc.SetChildPrimitive(PrimitiveValue(key), PrimitiveValue(value)); + ASSERT_OK(ExtendSubDocument( + DocPath(collection_key.Encode()), subdoc, 1100_usec_ht, + MonoDelta::FromSeconds(20 + i))); + if (intermediate_flushes) { + ASSERT_OK(FlushRocksDbAndWait()); + } + } + } + + string ExpectedDebugDumpForCollectionWithTTL(DocKey collection_key, + InitMarkerExpired init_marker_expired) { + // The "file ..." comments below are for the case of intermediate_flushes = true above. + const string result_template = init_marker_expired ? + // After the init marker expires, we should not see a tombstone for it. We do not replace + // timed-out collection init markers with tombstones on minor compactions, because that + // could hide keys that + R"#( + SubDocKey($0, ["k0"; HT{ physical: 1100 }]) -> "vv0"; ttl: 20.000s + SubDocKey($0, ["k1"; HT{ physical: 1100 }]) -> "vv1"; ttl: 21.000s + SubDocKey($0, ["k2"; HT{ physical: 1100 }]) -> "vv2"; ttl: 22.000s + SubDocKey($0, ["k3"; HT{ physical: 1100 }]) -> "vv3"; ttl: 23.000s + SubDocKey($0, ["k4"; HT{ physical: 1100 }]) -> "vv4"; ttl: 24.000s + SubDocKey($0, ["k5"; HT{ physical: 1100 }]) -> "vv5"; ttl: 25.000s + )#" : R"#( + SubDocKey($0, [HT{ physical: 1000 }]) -> {}; ttl: 10.000s // file 1 + SubDocKey($0, ["k0"; HT{ physical: 1100 }]) -> "vv0"; ttl: 20.000s // file 2 + SubDocKey($0, ["k0"; HT{ physical: 1000 w: 1 }]) -> "v0"; ttl: 10.000s // file 1 + SubDocKey($0, ["k1"; HT{ physical: 1100 }]) -> "vv1"; ttl: 21.000s // file 3 + SubDocKey($0, ["k1"; HT{ physical: 1000 w: 2 }]) -> "v1"; ttl: 10.000s // file 1 + SubDocKey($0, ["k2"; HT{ physical: 1100 }]) -> "vv2"; ttl: 22.000s // file 4 + SubDocKey($0, ["k2"; HT{ physical: 1000 w: 3 }]) -> "v2"; ttl: 10.000s // file 1 + SubDocKey($0, ["k3"; HT{ physical: 1100 }]) -> "vv3"; ttl: 23.000s // file 5 + SubDocKey($0, ["k4"; HT{ physical: 1100 }]) -> "vv4"; ttl: 24.000s // file 6 + SubDocKey($0, ["k5"; HT{ physical: 1100 }]) -> "vv5"; ttl: 25.000s // file 7 + )#"; + return Format(result_template, collection_key.ToString()); + } + }; class DocDBTestWithoutBlockCache: public DocDBTest { @@ -381,21 +467,21 @@ TEST_F(DocDBTest, HistoryCompactionFirstRowHandlingRegression) { const DocKey doc_key(PrimitiveValues("mydockey", 123456)); KeyBytes encoded_doc_key(doc_key.Encode()); ASSERT_OK(SetPrimitive( - DocPath(encoded_doc_key), PrimitiveValue::kObject, HybridTime::FromMicros(1000))); + DocPath(encoded_doc_key), PrimitiveValue::kObject, 1000_usec_ht)); ASSERT_OK(SetPrimitive( - DocPath(encoded_doc_key, PrimitiveValue("subkey1")), + DocPath(encoded_doc_key, "subkey1"), PrimitiveValue("value1"), - HybridTime::FromMicros(1000))); + 1000_usec_ht)); ASSERT_OK(SetPrimitive( - DocPath(encoded_doc_key, PrimitiveValue("subkey1")), + DocPath(encoded_doc_key, "subkey1"), PrimitiveValue("value2"), - HybridTime::FromMicros(2000))); + 2000_usec_ht)); ASSERT_OK(SetPrimitive( - DocPath(encoded_doc_key, PrimitiveValue("subkey1")), + DocPath(encoded_doc_key, "subkey1"), PrimitiveValue("value3"), - HybridTime::FromMicros(3000))); + 3000_usec_ht)); ASSERT_OK(SetPrimitive( - DocPath(encoded_doc_key), PrimitiveValue::kObject, HybridTime::FromMicros(4000))); + DocPath(encoded_doc_key), PrimitiveValue::kObject, 4000_usec_ht)); AssertDocDbDebugDumpStrEq(R"#( SubDocKey(DocKey([], ["mydockey", 123456]), [HT{ physical: 4000 }]) -> {} SubDocKey(DocKey([], ["mydockey", 123456]), [HT{ physical: 1000 }]) -> {} @@ -403,7 +489,7 @@ TEST_F(DocDBTest, HistoryCompactionFirstRowHandlingRegression) { SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey1"; HT{ physical: 2000 }]) -> "value2" SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey1"; HT{ physical: 1000 }]) -> "value1" )#"); - CompactHistoryBefore(HybridTime::FromMicros(3500)); + FullyCompactHistoryBefore(3500_usec_ht); AssertDocDbDebugDumpStrEq( R"#( SubDocKey(DocKey([], ["mydockey", 123456]), [HT{ physical: 4000 }]) -> {} @@ -443,10 +529,10 @@ TEST_F(DocDBTest, GetSubDocumentTest) { // We will test the state of the entire document after every operation, using timestamps // 500, 1500, 2500, 3500, 4500, 5500. - VerifySubDocument(SubDocKey(doc_key), HybridTime::FromMicros(500), ""); + VerifySubDocument(SubDocKey(doc_key), 500_usec_ht, ""); - VerifySubDocument(SubDocKey(doc_key), HybridTime::FromMicros(1500), - R"#( + VerifySubDocument(SubDocKey(doc_key), 1500_usec_ht, + R"#( { "a": { "1": "1", @@ -465,8 +551,8 @@ TEST_F(DocDBTest, GetSubDocumentTest) { } )#"); - VerifySubDocument(SubDocKey(doc_key), HybridTime::FromMicros(2500), - R"#( + VerifySubDocument(SubDocKey(doc_key), 2500_usec_ht, + R"#( { "a": { "1": "1", @@ -485,8 +571,8 @@ TEST_F(DocDBTest, GetSubDocumentTest) { } )#"); - VerifySubDocument(SubDocKey(doc_key), HybridTime::FromMicros(3500), - R"#( + VerifySubDocument(SubDocKey(doc_key), 3500_usec_ht, + R"#( { "a": { "1": "1", @@ -503,8 +589,8 @@ TEST_F(DocDBTest, GetSubDocumentTest) { } )#"); - VerifySubDocument(SubDocKey(doc_key), HybridTime::FromMicros(4500), - R"#( + VerifySubDocument(SubDocKey(doc_key), 4500_usec_ht, + R"#( { "a": { "1": "3", @@ -522,8 +608,8 @@ TEST_F(DocDBTest, GetSubDocumentTest) { } )#"); - VerifySubDocument(SubDocKey(doc_key), HybridTime::FromMicros(5500), - R"#( + VerifySubDocument(SubDocKey(doc_key), 5500_usec_ht, + R"#( { "a": { "1": "3", @@ -542,10 +628,10 @@ TEST_F(DocDBTest, GetSubDocumentTest) { // Test the evolution of SubDoc root.b at various timestamps. - VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), HybridTime::FromMicros(500), ""); + VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), 500_usec_ht, ""); - VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), HybridTime::FromMicros(2500), - R"#( + VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), 2500_usec_ht, + R"#( { "c": { "1": "3" @@ -557,8 +643,8 @@ TEST_F(DocDBTest, GetSubDocumentTest) { } )#"); - VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), HybridTime::FromMicros(3500), - R"#( + VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), 3500_usec_ht, + R"#( { "e": { "1": "8", @@ -568,8 +654,8 @@ TEST_F(DocDBTest, GetSubDocumentTest) { } )#"); - VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), HybridTime::FromMicros(5500), - R"#( + VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b")), 5500_usec_ht, + R"#( { "e": { "1": "8" @@ -579,11 +665,11 @@ TEST_F(DocDBTest, GetSubDocumentTest) { )#"); VerifySubDocument(SubDocKey( - doc_key, PrimitiveValue("b"), PrimitiveValue("d")), HybridTime::FromMicros(10000), ""); + doc_key, PrimitiveValue("b"), PrimitiveValue("d")), 10000_usec_ht, ""); VerifySubDocument(SubDocKey(doc_key, PrimitiveValue("b"), PrimitiveValue("d")), - HybridTime::FromMicros(2500), - R"#( + 2500_usec_ht, + R"#( { "1": "5", "2": "6" @@ -847,7 +933,7 @@ TEST_F(DocDBTest, ExpiredValueCompactionTest) { const DocKey doc_key(PrimitiveValues("k1")); const MonoDelta one_ms = 1ms; const MonoDelta two_ms = 2ms; - const HybridTime t0 = HybridTime::FromMicros(1000); + const HybridTime t0 = 1000_usec_ht; HybridTime t1 = server::HybridClock::AddPhysicalTimeToHybridTime(t0, two_ms); HybridTime t2 = server::HybridClock::AddPhysicalTimeToHybridTime(t1, two_ms); KeyBytes encoded_doc_key(doc_key.Encode()); @@ -867,7 +953,7 @@ TEST_F(DocDBTest, ExpiredValueCompactionTest) { SubDocKey(DocKey([], ["k1"]), ["s2"; HT{ physical: 5000 }]) -> "v24" SubDocKey(DocKey([], ["k1"]), ["s2"; HT{ physical: 1000 }]) -> "v21"; ttl: 0.003s )#"); - CompactHistoryBefore(t1); + FullyCompactHistoryBefore(t1); AssertDocDbDebugDumpStrEq( R"#( SubDocKey(DocKey([], ["k1"]), ["s1"; HT{ physical: 5000 }]) -> "v14" @@ -879,7 +965,7 @@ SubDocKey(DocKey([], ["k1"]), ["s2"; HT{ physical: 1000 }]) -> "v21"; ttl: 0.003 TEST_F(DocDBTest, TTLCompactionTest) { const DocKey doc_key(PrimitiveValues("k1")); const MonoDelta one_ms = 1ms; - const HybridTime t0 = HybridTime::FromMicros(1000); + const HybridTime t0 = 1000_usec_ht; HybridTime t1 = server::HybridClock::AddPhysicalTimeToHybridTime(t0, one_ms); HybridTime t2 = server::HybridClock::AddPhysicalTimeToHybridTime(t1, one_ms); HybridTime t3 = server::HybridClock::AddPhysicalTimeToHybridTime(t2, one_ms); @@ -919,7 +1005,7 @@ SubDocKey(DocKey([], ["k2"]), [ColumnId(0); HT{ physical: 1000 }]) -> "v1"; ttl: SubDocKey(DocKey([], ["k2"]), [ColumnId(1); HT{ physical: 1000 }]) -> "v2"; ttl: 0.001s )#"); - CompactHistoryBefore(t2); + FullyCompactHistoryBefore(t2); // Liveness column is gone for row1, v2 gone for row2. AssertDocDbDebugDumpStrEq( @@ -932,7 +1018,7 @@ SubDocKey(DocKey([], ["k2"]), [SystemColumnId(0); HT{ physical: 1000 }]) -> null SubDocKey(DocKey([], ["k2"]), [ColumnId(0); HT{ physical: 1000 }]) -> "v1"; ttl: 0.002s )#"); - CompactHistoryBefore(t3); + FullyCompactHistoryBefore(t3); // v1 is gone. AssertDocDbDebugDumpStrEq( @@ -943,7 +1029,7 @@ SubDocKey(DocKey([], ["k1"]), [ColumnId(3); HT{ physical: 1000 }]) -> "v4" SubDocKey(DocKey([], ["k2"]), [SystemColumnId(0); HT{ physical: 1000 }]) -> null; ttl: 0.003s )#"); - CompactHistoryBefore(t4); + FullyCompactHistoryBefore(t4); // v2 is gone for row 1, liveness column gone for row 2. AssertDocDbDebugDumpStrEq( R"#( @@ -966,7 +1052,7 @@ SubDocKey(DocKey([], ["k1"]), [ColumnId(3); HT{ physical: 2000 }]) -> DEL SubDocKey(DocKey([], ["k1"]), [ColumnId(3); HT{ physical: 1000 }]) -> "v4" )#"); - CompactHistoryBefore(t0); + FullyCompactHistoryBefore(t0); // Nothing is removed. AssertDocDbDebugDumpStrEq( R"#( @@ -976,7 +1062,7 @@ SubDocKey(DocKey([], ["k1"]), [ColumnId(3); HT{ physical: 2000 }]) -> DEL SubDocKey(DocKey([], ["k1"]), [ColumnId(3); HT{ physical: 1000 }]) -> "v4" )#"); - CompactHistoryBefore(t1); + FullyCompactHistoryBefore(t1); // Next compactions removes everything. AssertDocDbDebugDumpStrEq( R"#( @@ -985,21 +1071,20 @@ SubDocKey(DocKey([], ["k1"]), [ColumnId(3); HT{ physical: 1000 }]) -> "v4" TEST_F(DocDBTest, TableTTLCompactionTest) { const DocKey doc_key(PrimitiveValues("k1")); - const MonoDelta one_ms = 1ms; - const HybridTime t0 = HybridTime::FromMicros(1000); - HybridTime t1 = server::HybridClock::AddPhysicalTimeToHybridTime(t0, one_ms); - HybridTime t2 = server::HybridClock::AddPhysicalTimeToHybridTime(t1, one_ms); - HybridTime t3 = server::HybridClock::AddPhysicalTimeToHybridTime(t2, one_ms); - HybridTime t4 = server::HybridClock::AddPhysicalTimeToHybridTime(t3, one_ms); + const HybridTime t1 = 1000_usec_ht; + const HybridTime t2 = 2000_usec_ht; + const HybridTime t3 = 3000_usec_ht; + const HybridTime t4 = 4000_usec_ht; + const HybridTime t5 = 5000_usec_ht; KeyBytes encoded_doc_key(doc_key.Encode()); ASSERT_OK(SetPrimitive(DocPath(encoded_doc_key, PrimitiveValue("s1")), - Value(PrimitiveValue("v1"), 1ms), t0)); + Value(PrimitiveValue("v1"), 1ms), t1)); ASSERT_OK(SetPrimitive(DocPath(encoded_doc_key, PrimitiveValue("s2")), - Value(PrimitiveValue("v2"), Value::kMaxTtl), t0)); + Value(PrimitiveValue("v2"), Value::kMaxTtl), t1)); ASSERT_OK(SetPrimitive(DocPath(encoded_doc_key, PrimitiveValue("s3")), - Value(PrimitiveValue("v3"), 0ms), t1)); + Value(PrimitiveValue("v3"), 0ms), t2)); ASSERT_OK(SetPrimitive(DocPath(encoded_doc_key, PrimitiveValue("s4")), - Value(PrimitiveValue("v4"), 3ms), t0)); + Value(PrimitiveValue("v4"), 3ms), t1)); // Note: HT{ physical: 1000 } + 1ms = HT{ physical: 4097000 } AssertDocDbDebugDumpStrEq(R"#( SubDocKey(DocKey([], ["k1"]), ["s1"; HT{ physical: 1000 }]) -> "v1"; ttl: 0.001s @@ -1008,7 +1093,7 @@ TEST_F(DocDBTest, TableTTLCompactionTest) { SubDocKey(DocKey([], ["k1"]), ["s4"; HT{ physical: 1000 }]) -> "v4"; ttl: 0.003s )#"); SetTableTTL(2); - CompactHistoryBefore(t2); + FullyCompactHistoryBefore(t3); // v1 compacted due to column level ttl. AssertDocDbDebugDumpStrEq( @@ -1018,7 +1103,7 @@ SubDocKey(DocKey([], ["k1"]), ["s3"; HT{ physical: 2000 }]) -> "v3"; ttl: 0.000s SubDocKey(DocKey([], ["k1"]), ["s4"; HT{ physical: 1000 }]) -> "v4"; ttl: 0.003s )#"); - CompactHistoryBefore(t3); + FullyCompactHistoryBefore(t4); // v2 compacted due to table level ttl. // init marker compacted due to table level ttl. AssertDocDbDebugDumpStrEq( @@ -1027,7 +1112,7 @@ SubDocKey(DocKey([], ["k1"]), ["s3"; HT{ physical: 2000 }]) -> "v3"; ttl: 0.000s SubDocKey(DocKey([], ["k1"]), ["s4"; HT{ physical: 1000 }]) -> "v4"; ttl: 0.003s )#"); - CompactHistoryBefore(t4); + FullyCompactHistoryBefore(t5); // v4 compacted due to column level ttl. // v3 stays forever due to ttl being set to 0. AssertDocDbDebugDumpStrEq( @@ -1036,6 +1121,155 @@ SubDocKey(DocKey([], ["k1"]), ["s3"; HT{ physical: 2000 }]) -> "v3"; ttl: 0.000s )#"); } +TEST_F(DocDBTest, MinorCompactionNoDeletions) { + ASSERT_OK(DisableCompactions()); + const DocKey doc_key(PrimitiveValues("k")); + KeyBytes encoded_doc_key(doc_key.Encode()); + for (int i = 1; i <= 6; ++i) { + auto value_str = Format("v$0", i); + PV pv = PV(value_str); + ASSERT_OK(SetPrimitive( + DocPath(encoded_doc_key), Value(pv), HybridTime::FromMicros(i * 1000))); + ASSERT_OK(FlushRocksDbAndWait()); + } + + ASSERT_EQ(6, NumSSTableFiles()); + const char* kInitialDocDbStateStr = R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 6 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> "v5" // file 5 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 4000 }]) -> "v4" // file 4 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 3000 }]) -> "v3" // file 3 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 2000 }]) -> "v2" // file 2 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"; + + AssertDocDbDebugDumpStrEq(kInitialDocDbStateStr); + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + + ASSERT_EQ(5, NumSSTableFiles()); + // No changes in DocDB rows as we still need the entry at 5000_ms_ht. + // Let's call the output file resulting from the last compaction "file 7". + AssertDocDbDebugDumpStrEq(kInitialDocDbStateStr); + + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(4, NumSSTableFiles()); + // Removed the entry at 4000_ms_ht as it was overwritten at time 5000. Earlier entries are in + // other files that haven't been compacted yet. + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 8 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> "v5" // file 8 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 3000 }]) -> "v3" // file 3 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 2000 }]) -> "v2" // file 2 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(3, NumSSTableFiles()); + // Removed the entry at 3000_ms_ht. + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 9 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> "v5" // file 9 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 2000 }]) -> "v2" // file 2 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(2, NumSSTableFiles()); + // Removed the entry at 2000_ms_ht. + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 10 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> "v5" // file 10 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(1, NumSSTableFiles()); + // Removed the entry at 2000_ms_ht. + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 11 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> "v5" // file 11 + )#"); +} + +TEST_F(DocDBTest, MinorCompactionWithDeletions) { + ASSERT_OK(DisableCompactions()); + const DocKey doc_key(PrimitiveValues("k")); + KeyBytes encoded_doc_key(doc_key.Encode()); + for (int i = 1; i <= 6; ++i) { + auto value_str = Format("v$0", i); + PV pv = i == 5 ? PV::kTombstone : PV(value_str); + ASSERT_OK(SetPrimitive( + DocPath(encoded_doc_key), Value(pv), HybridTime::FromMicros(i * 1000))); + ASSERT_OK(FlushRocksDbAndWait()); + } + + ASSERT_EQ(6, NumSSTableFiles()); + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 6 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> DEL // file 5 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 4000 }]) -> "v4" // file 4 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 3000 }]) -> "v3" // file 3 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 2000 }]) -> "v2" // file 2 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + + ASSERT_EQ(5, NumSSTableFiles()); + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 7 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> DEL // file 7 as well +SubDocKey(DocKey([], ["k"]), [HT{ physical: 4000 }]) -> "v4" // file 4 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 3000 }]) -> "v3" // file 3 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 2000 }]) -> "v2" // file 2 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(4, NumSSTableFiles()); + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 8 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> DEL // file 8 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 3000 }]) -> "v3" // file 3 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 2000 }]) -> "v2" // file 2 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(3, NumSSTableFiles()); + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 9 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> DEL // file 9 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 2000 }]) -> "v2" // file 2 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(2, NumSSTableFiles()); + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 10 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 5000 }]) -> DEL // file 10 +SubDocKey(DocKey([], ["k"]), [HT{ physical: 1000 }]) -> "v1" // file 1 + )#"); + + // Now the minor compaction turns into a major compaction and we end up with one file. + // The tombstone is now gone as well. + MinorCompaction(5000_usec_ht, /* num_files_to_compact */ 2); + ASSERT_EQ(1, NumSSTableFiles()); + AssertDocDbDebugDumpStrEq( + R"#( +SubDocKey(DocKey([], ["k"]), [HT{ physical: 6000 }]) -> "v6" // file 11 + )#"); +} + TEST_F(DocDBTest, BasicTest) { // A few points to make it easier to understand the expected binary representations here: // - Initial bytes such as 'S' (kString), 'I' (kInt64) correspond to members of the enum @@ -1061,7 +1295,7 @@ TEST_F(DocDBTest, BasicTest) { TestInsertion( DocPath(string_valued_doc_key.Encode()), PrimitiveValue("value1"), - HybridTime::FromMicros(1000), + 1000_usec_ht, R"#(1. PutCF('Smy_key_where_value_is_a_string\x00\x00\ !', 'Svalue1'))#"); @@ -1071,7 +1305,7 @@ TEST_F(DocDBTest, BasicTest) { TestInsertion( DocPath(encoded_doc_key, "subkey_a"), PrimitiveValue("value_a"), - HybridTime::FromMicros(2000), + 2000_usec_ht, R"#( 1. PutCF('Smydockey\x00\x00\ I\x80\x00\x00\x00\x00\x01\xe2@\ @@ -1085,7 +1319,7 @@ TEST_F(DocDBTest, BasicTest) { TestInsertion( DocPath(encoded_doc_key, "subkey_b", "subkey_c"), PrimitiveValue("value_bc"), - HybridTime::FromMicros(3000), + 3000_usec_ht, R"#( 1. PutCF('Smydockey\x00\x00\ I\x80\x00\x00\x00\x00\x01\xe2@\ @@ -1102,7 +1336,7 @@ TEST_F(DocDBTest, BasicTest) { TestInsertion( DocPath(encoded_doc_key, "subkey_b", "subkey_d"), PrimitiveValue("value_bd"), - HybridTime::FromMicros(3500), + 3500_usec_ht, R"#( 1. PutCF('Smydockey\x00\x00\ I\x80\x00\x00\x00\x00\x01\xe2@\ @@ -1114,13 +1348,13 @@ TEST_F(DocDBTest, BasicTest) { // Delete a non-existent top-level document. We don't expect any tombstones to be created. TestDeletion( DocPath(encoded_doc_key, "subkey_x"), - HybridTime::FromMicros(4000), + 4000_usec_ht, ""); // Delete a leaf-level value in a subdocument. TestDeletion( DocPath(encoded_doc_key, "subkey_b", "subkey_c"), - HybridTime::FromMicros(5000), + 5000_usec_ht, R"#( 1. PutCF('Smydockey\x00\x00\ I\x80\x00\x00\x00\x00\x01\xe2@\ @@ -1132,7 +1366,7 @@ TEST_F(DocDBTest, BasicTest) { // Now delete an entire object. TestDeletion( DocPath(encoded_doc_key, "subkey_b"), - HybridTime::FromMicros(6000), + 6000_usec_ht, R"#( 1. PutCF('Smydockey\x00\x00\ I\x80\x00\x00\x00\x00\x01\xe2@\ @@ -1145,7 +1379,7 @@ TEST_F(DocDBTest, BasicTest) { TestInsertion( DocPath(encoded_doc_key, "subkey_b", "subkey_c"), PrimitiveValue("value_bc_prime"), - HybridTime::FromMicros(7000), + 7000_usec_ht, R"#( 1. PutCF('Smydockey\x00\x00\ I\x80\x00\x00\x00\x00\x01\xe2@\ @@ -1166,7 +1400,7 @@ TEST_F(DocDBTest, BasicTest) { ClearLogicalSnapshots(); CaptureLogicalSnapshot(); - CompactHistoryBefore(HybridTime::FromMicros(5000)); + FullyCompactHistoryBefore(5000_usec_ht); // The following entry gets deleted because it is invisible at hybrid_time 5000: // SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_c"; HT{ physical: 3000 }]) // -> "value_bc" @@ -1193,7 +1427,7 @@ SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_d"; HT{ physica // state with the first history compaction (at hybrid_time 5000) already performed. for (const auto &snapshot : logical_snapshots()) { snapshot.RestoreTo(rocksdb()); - CompactHistoryBefore(HybridTime::FromMicros(6000)); + FullyCompactHistoryBefore(6000_usec_ht); // Now the following entries get deleted, because the entire subdocument at "subkey_b" gets // deleted at hybrid_time 6000, so we won't look at these records if we do a scan at // HT{ physical: 6000 }: @@ -1229,14 +1463,14 @@ SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_c"; HT{ physica TestInsertion( DocPath(encoded_doc_key), PrimitiveValue::kObject, - HybridTime::FromMicros(8000), + 8000_usec_ht, R"#( 1. PutCF('Smydockey\x00\x00\ I\x80\x00\x00\x00\x00\x01\xe2@\ !', '{') )#"); - VerifySubDocument(SubDocKey(doc_key), HybridTime::FromMicros(8000), "{}"); + VerifySubDocument(SubDocKey(doc_key), 8000_usec_ht, "{}"); } // Reset our collection of snapshots now that we've performed one more operation. @@ -1262,7 +1496,7 @@ SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_d"; HT{ physica "value_bd" )#"); - CompactHistoryBefore(HybridTime::FromMicros(7999)); + FullyCompactHistoryBefore(7999_usec_ht); AssertDocDbDebugDumpStrEq(R"#( SubDocKey(DocKey([], ["my_key_where_value_is_a_string"]), [HT{ physical: 1000 }]) -> "value1" SubDocKey(DocKey([], ["mydockey", 123456]), [HT{ physical: 8000 }]) -> {} @@ -1278,7 +1512,7 @@ SubDocKey(DocKey([], ["mydockey", 123456]), ["subkey_b", "subkey_c"; HT{ physica // same result. for (int i = 0; i < logical_snapshots().size(); ++i) { RestoreToRocksDBLogicalSnapshot(i); - CompactHistoryBefore(HybridTime::FromMicros(8000)); + FullyCompactHistoryBefore(8000_usec_ht); AssertDocDbDebugDumpStrEq(R"#( SubDocKey(DocKey([], ["my_key_where_value_is_a_string"]), [HT{ physical: 1000 }]) -> "value1" SubDocKey(DocKey([], ["mydockey", 123456]), [HT{ physical: 8000 }]) -> {} @@ -1293,7 +1527,7 @@ TEST_F(DocDBTest, MultiOperationDocWriteBatch) { ASSERT_OK(dwb.SetPrimitive(DocPath(encoded_doc_key, "c", "d"), PrimitiveValue("v2"))); ASSERT_OK(dwb.SetPrimitive(DocPath(encoded_doc_key, "c", "e"), PrimitiveValue("v3"))); - ASSERT_OK(WriteToRocksDB(dwb, HybridTime::FromMicros(1000))); + ASSERT_OK(WriteToRocksDB(dwb, 1000_usec_ht)); AssertDocDbDebugDumpStrEq(R"#( SubDocKey(DocKey([], ["a"]), ["b"; HT{ physical: 1000 }]) -> "v1" @@ -1331,7 +1565,7 @@ class DocDBTestBoundaryValues: public DocDBTest { for (int i = 0; i != kTotalRows; ++i) { if (i % flush_rate == 0) { trackers.emplace_back(); - ASSERT_OK(FlushRocksDB()); + ASSERT_OK(FlushRocksDbAndWait()); } auto key_str = "key_" + std::to_string(distribution(rng)); auto key_int = distribution(rng); @@ -1348,8 +1582,8 @@ class DocDBTestBoundaryValues: public DocDBTest { string dwb_str; ASSERT_OK(FormatDocWriteBatch(dwb, &dwb_str)); SCOPED_TRACE("\nWrite batch:\n" + dwb_str); - ASSERT_OK(WriteToRocksDB(dwb, HybridTime::FromMicros(1000))); - ASSERT_OK(FlushRocksDB()); + ASSERT_OK(WriteToRocksDB(dwb, 1000_usec_ht)); + ASSERT_OK(FlushRocksDbAndWait()); for (auto i = 0; i != 2; ++i) { if (i) { @@ -1406,7 +1640,7 @@ TEST_F(DocDBTest, BloomFilterTest) { FLAGS_max_nexts_to_avoid_seek = 0; // Write batch and flush options. auto dwb = MakeDocWriteBatch(); - ASSERT_OK(FlushRocksDB()); + ASSERT_OK(FlushRocksDbAndWait()); DocKey key1(0, PrimitiveValues("key1"), PrimitiveValues()); DocKey key2(0, PrimitiveValues("key2"), PrimitiveValues()); @@ -1419,7 +1653,7 @@ TEST_F(DocDBTest, BloomFilterTest) { int total_table_iterators = 0; auto flush_rocksdb = [this, &total_table_iterators]() { - ASSERT_OK(FlushRocksDB()); + ASSERT_OK(FlushRocksDbAndWait()); total_table_iterators = options().statistics->getTickerCount(rocksdb::NO_TABLE_CACHE_ITERATORS); }; @@ -1513,7 +1747,7 @@ TEST_F(DocDBTest, MergingIterator) { auto dwb = MakeDocWriteBatch(); ASSERT_OK(dwb.SetPrimitive(DocPath(key1.Encode()), PrimitiveValue("value1"))); ASSERT_OK(WriteToRocksDB(dwb, ht)); - ASSERT_OK(FlushRocksDB()); + ASSERT_OK(FlushRocksDbAndWait()); // Put bigger key into memtable. DocKey key2(234, PrimitiveValues("key2"), PrimitiveValues()); @@ -1568,7 +1802,7 @@ TEST_F(DocDBTest, TestDisambiguationOnWriteId) { PrimitiveValue("value1"))); ASSERT_OK(dwb.SetPrimitive( DocPath(kEncodedDocKey1), PrimitiveValue::kTombstone)); - ASSERT_OK(WriteToRocksDBAndClear(&dwb, HybridTime::FromMicros(1000))); + ASSERT_OK(WriteToRocksDBAndClear(&dwb, 1000_usec_ht)); SubDocKey subdoc_key(kDocKey1); SubDocument subdoc; @@ -1585,7 +1819,7 @@ TEST_F(DocDBTest, TestDisambiguationOnWriteId) { // The row should still be absent after a compaction. // TODO(dtxn) - check both transaction and non-transaction path? - CompactHistoryBefore(HybridTime::FromMicros(cutoff_time_ms)); + FullyCompactHistoryBefore(HybridTime::FromMicros(cutoff_time_ms)); GetSubDocument(rocksdb(), data, rocksdb::kDefaultQueryId, kNonTransactionalOperationContext); ASSERT_FALSE(doc_found); AssertDocDbDebugDumpStrEq(""); @@ -1597,7 +1831,7 @@ TEST_F(DocDBTest, TestDisambiguationOnWriteId) { ASSERT_OK(dwb.SetPrimitive( DocPath(kEncodedDocKey2, PrimitiveValue(ColumnId(10))), PrimitiveValue("value2"))); - ASSERT_OK(WriteToRocksDBAndClear(&dwb, HybridTime::FromMicros(2000))); + ASSERT_OK(WriteToRocksDBAndClear(&dwb, 2000_usec_ht)); // TODO(dtxn) - check both transaction and non-transaction path? SubDocKey subdoc_key2(kDocKey2); auto encoded_subdoc_key2 = subdoc_key2.EncodeWithoutHt(); @@ -1609,7 +1843,7 @@ TEST_F(DocDBTest, TestDisambiguationOnWriteId) { CaptureLogicalSnapshot(); for (int cutoff_time_ms = 2000; cutoff_time_ms <= 2001; ++cutoff_time_ms) { RestoreToLastLogicalRocksDBSnapshot(); - CompactHistoryBefore(HybridTime::FromMicros(cutoff_time_ms)); + FullyCompactHistoryBefore(HybridTime::FromMicros(cutoff_time_ms)); // TODO(dtxn) - check both transaction and non-transaction path? GetSubDocument(rocksdb(), data, rocksdb::kDefaultQueryId, kNonTransactionalOperationContext); ASSERT_TRUE(doc_found); @@ -1629,7 +1863,7 @@ TEST_F(DocDBTest, StaticColumnCompaction) { const MonoDelta one_ms = 1ms; const MonoDelta two_ms = 2ms; - const HybridTime t0 = HybridTime::FromMicros(1000); + const HybridTime t0 = 1000_usec_ht; const HybridTime t1 = server::HybridClock::AddPhysicalTimeToHybridTime(t0, two_ms); const HybridTime t2 = server::HybridClock::AddPhysicalTimeToHybridTime(t1, two_ms); @@ -1698,7 +1932,7 @@ SubDocKey(DocKey(0x0000, ["h1"], ["r2"]), ["c8"; HT{ physical: 1000 }]) -> "v82" )#"); // Compact at t1 = HT{ physical: 3000 }. - CompactHistoryBefore(t1); + FullyCompactHistoryBefore(t1); // Verify after compaction: // s1 -> expired @@ -1728,11 +1962,11 @@ TEST_F(DocDBTest, TestUserTimestamp) { SetInitMarkerBehavior(InitMarkerBehavior::kRequired); ASSERT_NOK(SetPrimitive(DocPath(encoded_doc_key, PrimitiveValue("s10")), Value(PrimitiveValue("v10"), Value::kMaxTtl, 1000), - HybridTime::FromMicros(1000))); + 1000_usec_ht)); SetInitMarkerBehavior(InitMarkerBehavior::kOptional); - HybridTime ht = HybridTime::FromMicros(10000); + HybridTime ht = 10000_usec_ht; // Use same doc_write_batch to test cache. auto doc_write_batch = MakeDocWriteBatch(); ASSERT_OK(doc_write_batch.SetPrimitive( @@ -1786,8 +2020,8 @@ SubDocKey(DocKey([], ["k1"]), ["s3", "s5"; HT{ physical: 10000 w: 1 }]) -> "v1"; TEST_F(DocDBTest, TestCompactionWithUserTimestamp) { const DocKey doc_key(PrimitiveValues("k1")); - HybridTime t3000 = HybridTime::FromMicros(3000); - HybridTime t5000 = HybridTime::FromMicros(5000); + HybridTime t3000 = 3000_usec_ht; + HybridTime t5000 = 5000_usec_ht; KeyBytes encoded_doc_key(doc_key.Encode()); ASSERT_OK(SetPrimitive(DocPath(encoded_doc_key, PrimitiveValue("s1")), Value(PrimitiveValue("v11")), t3000)); @@ -1814,7 +2048,7 @@ TEST_F(DocDBTest, TestCompactionWithUserTimestamp) { )#"); // Compaction takes away everything. - CompactHistoryBefore(t5000); + FullyCompactHistoryBefore(t5000); AssertDocDbDebugDumpStrEq(R"#( )#"); @@ -1844,7 +2078,7 @@ TEST_F(DocDBTest, TestCompactionWithUserTimestamp) { SubDocKey(DocKey([], ["k1"]), ["s2"; HT{ physical: 3000 }]) -> "v11"; ttl: 0.001s )#"); - CompactHistoryBefore(t5000); + FullyCompactHistoryBefore(t5000); AssertDocDbDebugDumpStrEq(R"#( SubDocKey(DocKey([], ["k1"]), ["s1"; HT{ physical: 3000 }]) -> "v13"; user_timestamp: 4000 @@ -1862,7 +2096,7 @@ TEST_F(DocDBTest, TestCompactionWithUserTimestamp) { void QueryBounds(const DocKey& doc_key, int lower, int upper, int base, rocksdb::DB* rocksdb, SubDocument* doc_from_rocksdb, bool* subdoc_found, const SubDocKey& subdoc_to_search) { - HybridTime ht = HybridTime::FromMicros(1000000); + HybridTime ht = 1000000_usec_ht; auto lower_key = SubDocKey(doc_key, PrimitiveValue("subkey" + std::to_string(base + lower))).EncodeWithoutHt(); SliceKeyBound lower_bound(lower_key, BoundType::kInclusiveLower); @@ -1959,71 +2193,17 @@ TEST_F(DocDBTest, TestBuildSubDocumentBounds) { } TEST_F(DocDBTest, TestCompactionForCollectionsWithTTL) { - SubDocument subdoc; DocKey collection_key(PrimitiveValues("collection")); - int num_subkeys = 3; + SetUpCollectionWithTTL(collection_key, UseIntermediateFlushes::kFalse); - for (int i = 0; i < num_subkeys; i++) { - string key = "k" + std::to_string(i); - string value = "v" + std::to_string(i); - subdoc.SetChildPrimitive(PrimitiveValue(key), PrimitiveValue(value)); - } - ASSERT_OK(InsertSubDocument( - DocPath(collection_key.Encode()), subdoc, HybridTime::FromMicros(1000), 10s)); - - AssertDocDbDebugDumpStrEq(R"#( - SubDocKey(DocKey([], ["collection"]), [HT{ physical: 1000 }]) -> {}; ttl: 10.000s - SubDocKey(DocKey([], ["collection"]), ["k0"; HT{ physical: 1000 w: 1 }]) -> "v0"; ttl: 10.000s - SubDocKey(DocKey([], ["collection"]), ["k1"; HT{ physical: 1000 w: 2 }]) -> "v1"; ttl: 10.000s - SubDocKey(DocKey([], ["collection"]), ["k2"; HT{ physical: 1000 w: 3 }]) -> "v2"; ttl: 10.000s - )#"); - - // Set separate TTLs for each element. - for (int i = 0; i < num_subkeys; i++) { - SubDocument subdoc; - string key = "k" + std::to_string(i); - string value = "vv" + std::to_string(i); - subdoc.SetChildPrimitive(PrimitiveValue(key), PrimitiveValue(value)); - ASSERT_OK(ExtendSubDocument( - DocPath(collection_key.Encode()), subdoc, HybridTime::FromMicros(1100), - MonoDelta::FromSeconds(20 + i))); - } - - // Add new keys as well. - for (int i = num_subkeys; i < num_subkeys * 2; i++) { - SubDocument subdoc; - string key = "k" + std::to_string(i); - string value = "vv" + std::to_string(i); - subdoc.SetChildPrimitive(PrimitiveValue(key), PrimitiveValue(value)); - ASSERT_OK(ExtendSubDocument( - DocPath(collection_key.Encode()), subdoc, HybridTime::FromMicros(1100), - MonoDelta::FromSeconds(20 + i))); - } + AssertDocDbDebugDumpStrEq(ExpectedDebugDumpForCollectionWithTTL( + collection_key, InitMarkerExpired::kFalse)); - AssertDocDbDebugDumpStrEq(R"#( - SubDocKey(DocKey([], ["collection"]), [HT{ physical: 1000 }]) -> {}; ttl: 10.000s - SubDocKey(DocKey([], ["collection"]), ["k0"; HT{ physical: 1100 }]) -> "vv0"; ttl: 20.000s - SubDocKey(DocKey([], ["collection"]), ["k0"; HT{ physical: 1000 w: 1 }]) -> "v0"; ttl: 10.000s - SubDocKey(DocKey([], ["collection"]), ["k1"; HT{ physical: 1100 }]) -> "vv1"; ttl: 21.000s - SubDocKey(DocKey([], ["collection"]), ["k1"; HT{ physical: 1000 w: 2 }]) -> "v1"; ttl: 10.000s - SubDocKey(DocKey([], ["collection"]), ["k2"; HT{ physical: 1100 }]) -> "vv2"; ttl: 22.000s - SubDocKey(DocKey([], ["collection"]), ["k2"; HT{ physical: 1000 w: 3 }]) -> "v2"; ttl: 10.000s - SubDocKey(DocKey([], ["collection"]), ["k3"; HT{ physical: 1100 }]) -> "vv3"; ttl: 23.000s - SubDocKey(DocKey([], ["collection"]), ["k4"; HT{ physical: 1100 }]) -> "vv4"; ttl: 24.000s - SubDocKey(DocKey([], ["collection"]), ["k5"; HT{ physical: 1100 }]) -> "vv5"; ttl: 25.000s - )#"); + FullyCompactHistoryBefore(HybridTime::FromMicros(1050 + 10 * 1000000)); + AssertDocDbDebugDumpStrEq(ExpectedDebugDumpForCollectionWithTTL( + collection_key, InitMarkerExpired::kTrue)); - CompactHistoryBefore(HybridTime::FromMicros(1050 + 10000000)); - AssertDocDbDebugDumpStrEq(R"#( - SubDocKey(DocKey([], ["collection"]), ["k0"; HT{ physical: 1100 }]) -> "vv0"; ttl: 20.000s - SubDocKey(DocKey([], ["collection"]), ["k1"; HT{ physical: 1100 }]) -> "vv1"; ttl: 21.000s - SubDocKey(DocKey([], ["collection"]), ["k2"; HT{ physical: 1100 }]) -> "vv2"; ttl: 22.000s - SubDocKey(DocKey([], ["collection"]), ["k3"; HT{ physical: 1100 }]) -> "vv3"; ttl: 23.000s - SubDocKey(DocKey([], ["collection"]), ["k4"; HT{ physical: 1100 }]) -> "vv4"; ttl: 24.000s - SubDocKey(DocKey([], ["collection"]), ["k5"; HT{ physical: 1100 }]) -> "vv5"; ttl: 25.000s - )#"); - - auto subdoc_key = SubDocKey(collection_key).EncodeWithoutHt(); + const auto subdoc_key = SubDocKey(collection_key).EncodeWithoutHt(); SubDocument doc_from_rocksdb; bool subdoc_found_in_rocksdb = false; GetSubDocumentData data = { subdoc_key, &doc_from_rocksdb, &subdoc_found_in_rocksdb }; @@ -2032,7 +2212,7 @@ TEST_F(DocDBTest, TestCompactionForCollectionsWithTTL) { ReadHybridTime::FromMicros(1200))); ASSERT_TRUE(subdoc_found_in_rocksdb); - for (int i = 0; i < num_subkeys * 2; i++) { + for (int i = 0; i < kNumSubKeysForCollectionsWithTTL * 2; i++) { SubDocument subdoc; string key = "k" + std::to_string(i); string value = "vv" + std::to_string(i); @@ -2040,5 +2220,49 @@ TEST_F(DocDBTest, TestCompactionForCollectionsWithTTL) { } } +TEST_F(DocDBTest, MinorCompactionsForCollectionsWithTTL) { + ASSERT_OK(DisableCompactions()); + DocKey collection_key(PrimitiveValues("c")); + SetUpCollectionWithTTL(collection_key, UseIntermediateFlushes::kTrue); + AssertDocDbDebugDumpStrEq( + ExpectedDebugDumpForCollectionWithTTL(collection_key, InitMarkerExpired::kFalse)); + MinorCompaction( + HybridTime::FromMicros(1100 + 20 * 1000000 + 1), /* num_files_to_compact */ 2, + /* start_index */ 1); + + AssertDocDbDebugDumpStrEq(R"#( +SubDocKey(DocKey([], ["c"]), [HT{ physical: 1000 }]) -> {}; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k0"; HT{ physical: 1100 }]) -> DEL // file 8 +SubDocKey(DocKey([], ["c"]), ["k0"; HT{ physical: 1000 w: 1 }]) -> "v0"; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k1"; HT{ physical: 1100 }]) -> "vv1"; ttl: 21.000s // file 8 +SubDocKey(DocKey([], ["c"]), ["k1"; HT{ physical: 1000 w: 2 }]) -> "v1"; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k2"; HT{ physical: 1100 }]) -> "vv2"; ttl: 22.000s // file 4 +SubDocKey(DocKey([], ["c"]), ["k2"; HT{ physical: 1000 w: 3 }]) -> "v2"; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k3"; HT{ physical: 1100 }]) -> "vv3"; ttl: 23.000s // file 5 +SubDocKey(DocKey([], ["c"]), ["k4"; HT{ physical: 1100 }]) -> "vv4"; ttl: 24.000s // file 6 +SubDocKey(DocKey([], ["c"]), ["k5"; HT{ physical: 1100 }]) -> "vv5"; ttl: 25.000s // file 7 + )#"); + + // Compact files 4, 5, 6, 7, 8. This should result in creation of a number of delete markers + // from expired entries. Some expired entries from the first file will stay. + MinorCompaction( + HybridTime::FromMicros(1100 + 24 * 1000000 + 1), /* num_files_to_compact */ 5, + /* start_index */ 1); + + AssertDocDbDebugDumpStrEq(R"#( +SubDocKey(DocKey([], ["c"]), [HT{ physical: 1000 }]) -> {}; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k0"; HT{ physical: 1100 }]) -> DEL // file 9 +SubDocKey(DocKey([], ["c"]), ["k0"; HT{ physical: 1000 w: 1 }]) -> "v0"; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k1"; HT{ physical: 1100 }]) -> DEL // file 9 +SubDocKey(DocKey([], ["c"]), ["k1"; HT{ physical: 1000 w: 2 }]) -> "v1"; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k2"; HT{ physical: 1100 }]) -> DEL // file 9 +SubDocKey(DocKey([], ["c"]), ["k2"; HT{ physical: 1000 w: 3 }]) -> "v2"; ttl: 10.000s // file 1 +SubDocKey(DocKey([], ["c"]), ["k3"; HT{ physical: 1100 }]) -> DEL // file 9 +SubDocKey(DocKey([], ["c"]), ["k4"; HT{ physical: 1100 }]) -> DEL // file 9 +SubDocKey(DocKey([], ["c"]), ["k5"; HT{ physical: 1100 }]) -> "vv5"; ttl: 25.000s // file 9 + )#"); + +} + } // namespace docdb } // namespace yb diff --git a/src/yb/docdb/docdb.cc b/src/yb/docdb/docdb.cc index a64be566a78e..c6468f8c2339 100644 --- a/src/yb/docdb/docdb.cc +++ b/src/yb/docdb/docdb.cc @@ -418,7 +418,7 @@ CHECKED_STATUS BuildSubDocument( key = key_copy.AsSlice(); rocksdb::Slice value = iter->value(); - // Checking that intent aware iterator returns entry with correct time. + // Checking that IntentAwareIterator returns an entry with correct time. DCHECK_GE(iter->read_time().global_limit, doc_ht.hybrid_time()) << "Found key: " << SubDocKey::DebugSliceToString(key); @@ -430,6 +430,7 @@ CHECKED_STATUS BuildSubDocument( Value doc_value; RETURN_NOT_OK(doc_value.Decode(value)); + ValueType value_type = doc_value.value_type(); if (key == data.subdocument_key) { const MonoDelta ttl = ComputeTTL(doc_value.ttl(), data.table_ttl); @@ -449,32 +450,28 @@ CHECKED_STATUS BuildSubDocument( expiry.ToDebugString(), low_ts.ToString()); } - // Treat the value as a tombstone written at expiry time. Note that this doesn't apply - // to init markers for collections since even if the init marker for the collection has - // expired, individual elements in the collection might still be valid. - if (!IsCollectionType(doc_value.value_type())) { - doc_value = Value(PrimitiveValue::kTombstone); - // Use a write id that could never be used by a real operation within a single-shard - // txn, so that we don't split that operation into multiple parts. - write_time = DocHybridTime(expiry, kMaxWriteId); - } + // Treat an expired value as a tombstone written at the same time as the original value. + doc_value = Value::Tombstone(); + value_type = ValueType::kTombstone; } } + + const bool is_collection = IsCollectionType(value_type); // We have found some key that matches our entire subdocument_key, i.e. we didn't skip ahead // to a lower level key (with optional object init markers). - if (IsCollectionType(doc_value.value_type()) || - doc_value.value_type() == ValueType::kTombstone) { + if (is_collection || value_type == ValueType::kTombstone) { if (low_ts < write_time) { low_ts = write_time; } - if (IsCollectionType(doc_value.value_type()) && !has_expired) { - *data.result = SubDocument(doc_value.value_type()); + if (is_collection && !has_expired) { + *data.result = SubDocument(value_type); } - // If the low subkey cannot include the found key, we want to skip to the low subkey, - // but if it can, we want to seek to the next key. This prevents an infinite loop - // where the iterator keeps seeking to itself if the found key matches the low subkey. - if (IsObjectType(doc_value.value_type()) && !data.low_subkey->CanInclude(key)) { + // If the subkey lower bound filters out the key we found, we want to skip to the lower + // bound. If it does not, we want to seek to the next key. This prevents an infinite loop + // where the iterator keeps seeking to itself if the key we found matches the low subkey. + // TODO: why are not we doing this for arrays? + if (IsObjectType(value_type) && !data.low_subkey->CanInclude(key)) { // Try to seek to the low_subkey for efficiency. SeekToLowerBound(*data.low_subkey, iter); } else { @@ -483,9 +480,9 @@ CHECKED_STATUS BuildSubDocument( } continue; } else { - if (!IsPrimitiveValueType(doc_value.value_type())) { + if (!IsPrimitiveValueType(value_type)) { return STATUS_FORMAT(Corruption, - "Expected primitive value type, got $0", doc_value.value_type()); + "Expected primitive value type, got $0", value_type); } DCHECK_GE(iter->read_time().global_limit, write_time.hybrid_time()); @@ -522,7 +519,8 @@ CHECKED_STATUS BuildSubDocument( } SubDocument descendant{PrimitiveValue(ValueType::kInvalidValueType)}; - // TODO: what if found_key is the same as before? We'll get into an infinite recursion then. + // TODO: what if the key we found is the same as before? + // We'll get into an infinite recursion then. { IntentAwareIteratorPrefixScope prefix_scope(key, iter); RETURN_NOT_OK(BuildSubDocument( @@ -650,8 +648,7 @@ yb::Status GetSubDocument( if (!decode_result) { break; } - RETURN_NOT_OK(db_iter->FindLastWriteTime( - key_slice, &max_deleted_ts, nullptr /* result_value */)); + RETURN_NOT_OK(db_iter->FindLastWriteTime(key_slice, &max_deleted_ts)); key_slice = Slice(key_slice.data(), temp_key.data() - key_slice.data()); } } @@ -661,14 +658,15 @@ yb::Status GetSubDocument( // Check for init-marker / tombstones at the top level, update max_deleted_ts. Value doc_value = Value(PrimitiveValue(ValueType::kInvalidValueType)); RETURN_NOT_OK(db_iter->FindLastWriteTime(key_slice, &max_deleted_ts, &doc_value)); + const ValueType value_type = doc_value.value_type(); if (data.return_type_only) { - *data.doc_found = doc_value.value_type() != ValueType::kInvalidValueType; + *data.doc_found = value_type != ValueType::kInvalidValueType; // Check for ttl. if (*data.doc_found) { const MonoDelta ttl = ComputeTTL(doc_value.ttl(), data.table_ttl); DocHybridTime write_time(DocHybridTime::kMin); - RETURN_NOT_OK(db_iter->FindLastWriteTime(key_slice, &write_time, nullptr)); + RETURN_NOT_OK(db_iter->FindLastWriteTime(key_slice, &write_time)); if (write_time != DocHybridTime::kMin && !ttl.Equals(Value::kMaxTtl)) { const HybridTime expiry = server::HybridClock::AddPhysicalTimeToHybridTime(write_time.hybrid_time(), ttl); @@ -690,17 +688,20 @@ yb::Status GetSubDocument( IntentAwareIteratorPrefixScope prefix_scope(key_slice, db_iter); RETURN_NOT_OK(BuildSubDocument(db_iter, data, max_deleted_ts, &num_values_observed)); *data.doc_found = data.result->value_type() != ValueType::kInvalidValueType; - if (*data.doc_found && doc_value.value_type() == ValueType::kRedisSet) { - RETURN_NOT_OK(data.result->ConvertToRedisSet()); - } else if (*data.doc_found && doc_value.value_type() == ValueType::kRedisTS) { - RETURN_NOT_OK(data.result->ConvertToRedisTS()); - } else if (*data.doc_found && doc_value.value_type() == ValueType::kRedisSortedSet) { - RETURN_NOT_OK(data.result->ConvertToRedisSortedSet()); + if (*data.doc_found) { + if (value_type == ValueType::kRedisSet) { + RETURN_NOT_OK(data.result->ConvertToRedisSet()); + } else if (value_type == ValueType::kRedisTS) { + RETURN_NOT_OK(data.result->ConvertToRedisTS()); + } else if (value_type == ValueType::kRedisSortedSet) { + RETURN_NOT_OK(data.result->ConvertToRedisSortedSet()); + } + // TODO: Could also handle lists here. } - // TODO: Also could handle lists here. return Status::OK(); } + // For each subkey in the projection, build subdocument. *data.result = SubDocument(); for (const PrimitiveValue& subkey : *projection) { diff --git a/src/yb/docdb/docdb.h b/src/yb/docdb/docdb.h index 899abf2fc760..5f5af13f915a 100644 --- a/src/yb/docdb/docdb.h +++ b/src/yb/docdb/docdb.h @@ -418,7 +418,9 @@ YB_STRONGLY_TYPED_BOOL(IncludeBinary); // Reports all errors to the output stream and returns the status of the first failed operation, // if any. void DocDBDebugDump( - rocksdb::DB* rocksdb, std::ostream& out, IncludeBinary include_binary = IncludeBinary::kFalse); + rocksdb::DB* rocksdb, + std::ostream& out, + IncludeBinary include_binary = IncludeBinary::kFalse); std::string DocDBDebugDumpToStr( rocksdb::DB* rocksdb, IncludeBinary include_binary = IncludeBinary::kFalse); diff --git a/src/yb/docdb/docdb_compaction_filter.cc b/src/yb/docdb/docdb_compaction_filter.cc index 08b92e22deda..734fe781914c 100644 --- a/src/yb/docdb/docdb_compaction_filter.cc +++ b/src/yb/docdb/docdb_compaction_filter.cc @@ -38,10 +38,10 @@ namespace docdb { DocDBCompactionFilter::DocDBCompactionFilter(HybridTime history_cutoff, ColumnIdsPtr deleted_cols, - bool is_full_compaction, + bool is_major_compaction, MonoDelta table_ttl) : history_cutoff_(history_cutoff), - is_full_compaction_(is_full_compaction), + is_major_compaction_(is_major_compaction), is_first_key_value_(true), filter_usage_logged_(false), table_ttl_(table_ttl), @@ -56,20 +56,10 @@ bool DocDBCompactionFilter::Filter(int level, const rocksdb::Slice& existing_value, std::string* new_value, bool* value_changed) const { - if (!is_full_compaction_) { - // By default, we only perform history garbage collection on full compactions - // (or major compactions, in the HBase terminology). - // - // TODO: Enable history garbage collection on minor (non-full) compactions as well. - // This should be similar to the existing workflow, but should be extensively tested. - // - // Here, false means "keep the key/value pair" (don't filter it out). - return false; - } - if (!filter_usage_logged_) { // TODO: switch this to VLOG if it becomes too chatty. - LOG(INFO) << "DocDB compaction filter is being used"; + LOG(INFO) << "DocDB compaction filter is being used for a " + << (is_major_compaction_ ? "major" : "minor") << " compaction"; filter_usage_logged_ = true; } @@ -78,9 +68,9 @@ bool DocDBCompactionFilter::Filter(int level, // TODO: Find a better way for handling of data corruption encountered during compactions. const Status key_decode_status = subdoc_key.FullyDecodeFrom(key); CHECK(key_decode_status.ok()) - << "Error decoding a key during compaction: " << key_decode_status.ToString() << "\n" - << " Key (raw): " << FormatRocksDBSliceAsStr(key) << "\n" - << " Key (best-effort decoded): " << BestEffortDocDBKeyToStr(key); + << "Error decoding a key during compaction: " << key_decode_status.ToString() << "\n" + << " Key (raw): " << FormatRocksDBSliceAsStr(key) << "\n" + << " Key (best-effort decoded): " << BestEffortDocDBKeyToStr(key); if (is_first_key_value_) { CHECK_EQ(0, overwrite_ht_.size()); @@ -95,35 +85,40 @@ bool DocDBCompactionFilter::Filter(int level, const DocHybridTime& ht = subdoc_key.doc_hybrid_time(); - // We're comparing the hybrid_time in this key with the _previous_ stack top of overwrite_ht_, - // after truncating the previous hybrid_time to the number of components in the common prefix - // of previous and current key. + // We're comparing the hybrid time in this key with the stack top of overwrite_ht_ after + // truncating the stack to the number of components in the common prefix of previous and current + // key. // // Example (history_cutoff_ = 12): // -------------------------------------------------------------------------------------------- // Key overwrite_ht_ stack and relevant notes // -------------------------------------------------------------------------------------------- // k1 T10 [MinHT] + // // k1 T5 [T10] + // // k1 col1 T11 [T10, T11] + // // k1 col1 T7 The stack does not get truncated (shared prefix length is 2), so // prev_overwrite_ht = 11. Removing this entry because 7 < 11. + // The stack stays at [T10, T11]. + // // k1 col2 T9 Truncating the stack to [T10], setting prev_overwrite_ht to 10, and therefore // deciding to remove this entry because 9 < 10. // const DocHybridTime prev_overwrite_ht = - overwrite_ht_.empty() ? DocHybridTime::kMin : overwrite_ht_.back(); + overwrite_ht_.empty() ? DocHybridTime::kMin : overwrite_ht_.back(); // We only keep entries with hybrid_time equal to or later than the latest time the subdocument - // was fully overwritten or deleted prior to or at the history cutoff hybrid_time. The intuition - // is that key/value pairs that were overwritten at or before history cutoff time will not be - // visible at history cutoff time or any later time anyway. + // was fully overwritten or deleted prior to or at the history cutoff time. The intuition is that + // key/value pairs that were overwritten at or before history cutoff time will not be visible at + // history cutoff time or any later time anyway. // // Furthermore, we only need to update the overwrite hybrid_time stack in case we have decided to - // keep the new entry. Otherwise, the current entry's hybrid_time ts is less than the previous + // keep the new entry. Otherwise, the current entry's hybrid time ht is less than the previous // overwrite hybrid_time prev_overwrite_ht, and therefore it does not provide any new information - // about key/value pairs that follow being overwritten at a particular hybrid_time. Another way to - // explain this is to look at the logic that follows. If we don't early-exit here while ts is less + // about key/value pairs that follow being overwritten at a particular hybrid time. Another way to + // explain this is to look at the logic that follows. If we don't early-exit here while ht is less // than prev_overwrite_ht, we'll end up adding more prev_overwrite_ht values to the overwrite // hybrid_time stack, and we might as well do that while handling the next key/value pair that // does not get cleaned up the same way as this one. @@ -148,26 +143,31 @@ bool DocDBCompactionFilter::Filter(int level, const bool ht_at_or_below_cutoff = ht.hybrid_time() <= history_cutoff_; - // See if we found a higher hybrid_time not exceeding the history cutoff hybrid_time at which the + // See if we found a higher hybrid time not exceeding the history cutoff hybrid time at which the // subdocument (including a primitive value) rooted at the current key was fully overwritten. - // In case ts > history_cutoff_, we just keep the parent document's highest known overwrite - // hybrid_time that does not exceed the cutoff hybrid_time. In that case this entry is obviously + // In case of ht > history_cutoff_, we just keep the parent document's highest known overwrite + // hybrid time that does not exceed the cutoff hybrid time. In that case this entry is obviously // too new to be garbage-collected. overwrite_ht_.push_back(ht_at_or_below_cutoff ? max(prev_overwrite_ht, ht) : prev_overwrite_ht); CHECK_EQ(new_stack_size, overwrite_ht_.size()); - prev_subdoc_key_ = std::move(subdoc_key); - if (prev_subdoc_key_.num_subkeys() > 0 && - prev_subdoc_key_.subkeys()[0].value_type() == ValueType::kColumnId) { - // Column ID is first subkey in QL tables. - ColumnId col_id = prev_subdoc_key_.subkeys()[0].GetColumnId(); - - if (deleted_cols_->find(col_id) != deleted_cols_->end()) { + // Check for CQL columns deleted from the schema. This is done regardless of whether this is a + // major or minor compaction. + // + // TODO: could there be a case when there is still a read request running that uses an old schema, + // and we end up removing some data that the client expects to see? + if (subdoc_key.num_subkeys() > 0) { + const auto& first_subkey = subdoc_key.subkeys()[0]; + // Column ID is the first subkey in every CQL row. + if (first_subkey.value_type() == ValueType::kColumnId && + deleted_cols_->find(first_subkey.GetColumnId()) != deleted_cols_->end()) { return true; } } + prev_subdoc_key_ = std::move(subdoc_key); + ValueType value_type; CHECK_OK(Value::DecodePrimitiveValueType(existing_value, &value_type)); MonoDelta ttl; @@ -177,34 +177,35 @@ bool DocDBCompactionFilter::Filter(int level, bool has_expired = false; - CHECK_OK(HasExpiredTTL(subdoc_key.hybrid_time(), ComputeTTL(ttl, table_ttl_), history_cutoff_, - &has_expired)); + if (ht_at_or_below_cutoff) { + // Only check for expiration if the current hybrid time is at or below history cutoff. + // The key could not have possibly expired by history_cutoff_ otherwise. + CHECK_OK(HasExpiredTTL(subdoc_key.hybrid_time(), ComputeTTL(ttl, table_ttl_), history_cutoff_, + &has_expired)); + } // As of 02/2017, we don't have init markers for top level documents in QL. As a result, we can // compact away each column if it has expired, including the liveness system column. The init // markers in Redis wouldn't be affected since they don't have any TTL associated with them and - // the ttl would default to kMaxTtl which would make has_expired false. + // the TTL would default to kMaxTtl which would make has_expired false. if (has_expired) { // This is consistent with the condition we're testing for deletes at the bottom of the function - // because ts <= history_cutoff_ is implied by has_expired. - if (is_full_compaction_) { + // because ht_at_or_below_cutoff is implied by has_expired. + if (is_major_compaction_) { return true; } + // During minor compactions, expired values are written back as tombstones because removing the - // record might expose earlier values which would be incorrect. Note that this doesn't apply - // to init markers for collections since even if the init marker for the collection has - // expired, individual elements in the collection might still be valid. - if (!IsCollectionType(value_type)) { - *value_changed = true; - *new_value = Value(PrimitiveValue::kTombstone).Encode(); - } + // record might expose earlier values which would be incorrect. + *value_changed = true; + *new_value = Value::EncodedTombstone(); } - // Deletes at or below the history cutoff hybrid_time can always be cleaned up on full (major) - // compactions. However, we do need to update the overwrite hybrid_time stack in this case (as we + // Tombstones at or below the history cutoff hybrid_time can always be cleaned up on full (major) + // compactions. However, we do need to update the overwrite hybrid time stack in this case (as we // just did), because this deletion (tombstone) entry might be the only reason for cleaning up - // more entries appearing at earlier hybrid_times. - return value_type == ValueType::kTombstone && ht_at_or_below_cutoff && is_full_compaction_; + // more entries appearing at earlier hybrid times. + return value_type == ValueType::kTombstone && ht_at_or_below_cutoff && is_major_compaction_; } const char* DocDBCompactionFilter::Name() const { diff --git a/src/yb/docdb/docdb_compaction_filter.h b/src/yb/docdb/docdb_compaction_filter.h index cace77f32dba..b5bc4006acb1 100644 --- a/src/yb/docdb/docdb_compaction_filter.h +++ b/src/yb/docdb/docdb_compaction_filter.h @@ -31,7 +31,7 @@ class DocDBCompactionFilter : public rocksdb::CompactionFilter { public: DocDBCompactionFilter(HybridTime history_cutoff, ColumnIdsPtr deleted_cols, - bool is_full_compaction, + bool is_major_compaction, MonoDelta table_ttl); ~DocDBCompactionFilter() override; @@ -45,11 +45,11 @@ class DocDBCompactionFilter : public rocksdb::CompactionFilter { private: // We will not keep history below this hybrid_time. The view of the database at this hybrid_time // is preserved, but after the compaction completes, we should not expect to be able to do - // consistent scans at DocDB hybrid_times lower than this. Those scans will result in missing + // consistent scans at DocDB hybrid times lower than this. Those scans will result in missing // data. Therefore, it is really important to always set this to a value lower than or equal to // the lowest "read point" of any pending read operations. const HybridTime history_cutoff_; - const bool is_full_compaction_; + const bool is_major_compaction_; mutable bool is_first_key_value_; mutable SubDocKey prev_subdoc_key_; @@ -72,7 +72,7 @@ class DocDBCompactionFilter : public rocksdb::CompactionFilter { // doc_key1 HT(20) -> DEL | [20] | 20 >= MinHT, keeping the entry // | | ^^ ^^^^^ // | Note: we're comparing the hybrid_time in this key with - // | the _previous_ stack top of overwrite_ht_. + // | the previous stack top of overwrite_ht_. // | | // doc_key1 HT(10) -> {} | [20] | 10 < 20, deleting the entry // doc_key1 subkey1 HT(35) -> "value4" | [20, 20] | 35 >= 20, keeping the entry diff --git a/src/yb/docdb/docdb_test_util.cc b/src/yb/docdb/docdb_test_util.cc index 7b18a5ad73f4..5d804e53dbe2 100644 --- a/src/yb/docdb/docdb_test_util.cc +++ b/src/yb/docdb/docdb_test_util.cc @@ -17,6 +17,8 @@ #include #include +#include + #include "yb/rocksdb/table.h" #include "yb/rocksdb/util/statistics.h" @@ -36,7 +38,10 @@ #include "yb/util/string_trim.h" #include "yb/util/test_macros.h" #include "yb/util/test_util.h" +#include "yb/util/tostring.h" +#include "yb/util/algorithm_util.h" #include "yb/tablet/tablet_options.h" +#include "yb/rocksdb/db/filename.h" using std::endl; using std::make_shared; @@ -49,6 +54,9 @@ using strings::Substitute; using yb::util::ApplyEagerLineContinuation; using yb::util::FormatBytesAsStr; +using yb::util::TrimStr; +using yb::util::LeftShiftTextBlock; +using yb::util::TrimCppComments; namespace yb { namespace docdb { @@ -262,8 +270,7 @@ void LogicalRocksDBDebugSnapshot::Capture(rocksdb::DB* rocksdb) { auto iter = unique_ptr(rocksdb->NewIterator(read_options)); iter->SeekToFirst(); while (iter->Valid()) { - kvs.emplace_back(iter->key().ToString(/* hex = */ false), - iter->value().ToString(/* hex = */ false)); + kvs.emplace_back(iter->key().ToBuffer(), iter->value().ToBuffer()); iter->Next(); } // Save the DocDB debug dump as a string so we can check that we've properly restored the snapshot @@ -392,7 +399,7 @@ void DocDBLoadGenerator::PerformOperation(bool compact_history) { if (do_compaction_now) { // This will happen between the two iterations of the loop. If compact_history is false, // there is only one iteration and the compaction does not happen. - fixture_->CompactHistoryBefore(hybrid_time); + fixture_->FullyCompactHistoryBefore(hybrid_time); } SubDocKey sub_doc_key(doc_key); SubDocument doc_from_rocksdb; @@ -435,7 +442,7 @@ HybridTime DocDBLoadGenerator::last_operation_ht() const { void DocDBLoadGenerator::FlushRocksDB() { LOG(INFO) << "Forcing a RocksDB flush after hybrid_time " << last_operation_ht().value(); - ASSERT_OK(fixture_->FlushRocksDB()); + ASSERT_OK(fixture_->FlushRocksDbAndWait()); } void DocDBLoadGenerator::CaptureDocDbSnapshot() { @@ -558,15 +565,119 @@ TransactionOperationContextOpt DocDBLoadGenerator::GetReadOperationTransactionCo // ------------------------------------------------------------------------------------------------ void DocDBRocksDBFixture::AssertDocDbDebugDumpStrEq(const string &expected) { - ASSERT_STR_EQ_VERBOSE_TRIMMED(ApplyEagerLineContinuation(expected), DocDBDebugDumpToStr()); + const string debug_dump_str = TrimDocDbDebugDumpStr(DocDBDebugDumpToStr()); + const string expected_str = TrimDocDbDebugDumpStr(expected); + if (expected_str != debug_dump_str) { + LOG(INFO) << "Assertion failure" + << "\nExpected DocDB contents:\n\n" << expected_str << "\n" + << "\nActual DocDB contents:\n\n" << debug_dump_str << "\n"; + FAIL(); + } } -void DocDBRocksDBFixture::CompactHistoryBefore(HybridTime history_cutoff) { - LOG(INFO) << "Compacting history before hybrid_time " << history_cutoff.ToDebugString(); +void DocDBRocksDBFixture::FullyCompactHistoryBefore(HybridTime history_cutoff) { + LOG(INFO) << "Major-compacting history before hybrid_time " << history_cutoff; SetHistoryCutoffHybridTime(history_cutoff); - ASSERT_OK(FlushRocksDB()); + BOOST_SCOPE_EXIT(this_) { + this_->SetHistoryCutoffHybridTime(HybridTime::kMin); + } BOOST_SCOPE_EXIT_END; + + ASSERT_OK(FlushRocksDbAndWait()); ASSERT_OK(FullyCompactDB(rocksdb_.get())); - SetHistoryCutoffHybridTime(HybridTime::kMin); +} + +void DocDBRocksDBFixture::MinorCompaction( + HybridTime history_cutoff, + int num_files_to_compact, + int start_index) { + + ASSERT_OK(FlushRocksDbAndWait()); + SetHistoryCutoffHybridTime(history_cutoff); + BOOST_SCOPE_EXIT(this_) { + this_->SetHistoryCutoffHybridTime(HybridTime::kMin); + } BOOST_SCOPE_EXIT_END; + + rocksdb::ColumnFamilyMetaData cf_meta; + rocksdb_->GetColumnFamilyMetaData(&cf_meta); + + vector compaction_input_file_names; + vector remaining_file_names; + + size_t initial_num_files = 0; + { + const auto& files = cf_meta.levels[0].files; + initial_num_files = files.size(); + ASSERT_LE(num_files_to_compact, files.size()); + vector file_names; + for (const auto& sst_meta : files) { + file_names.push_back(sst_meta.name); + } + SortByKey(file_names.begin(), file_names.end(), rocksdb::TableFileNameToNumber); + + if (start_index < 0) { + start_index = file_names.size() - num_files_to_compact; + } + + for (int i = 0; i < file_names.size(); ++i) { + if (start_index <= i && compaction_input_file_names.size() < num_files_to_compact) { + compaction_input_file_names.push_back(file_names[i]); + } else { + remaining_file_names.push_back(file_names[i]); + } + } + ASSERT_EQ(num_files_to_compact, compaction_input_file_names.size()) + << "Tried to add " << num_files_to_compact << " files starting with index " << start_index + << ", ended up adding " << yb::ToString(compaction_input_file_names) + << " and leaving " << yb::ToString(remaining_file_names) << " out. All files: " + << yb::ToString(file_names); + + LOG(INFO) << "Minor-compacting history before hybrid_time " << history_cutoff << ":\n" + << " files being compacted: " << yb::ToString(compaction_input_file_names) << "\n" + << " other files: " << yb::ToString(remaining_file_names); + + ASSERT_OK(rocksdb_->CompactFiles( + rocksdb::CompactionOptions(), + compaction_input_file_names, + /* output_level */ 0)); + const auto sstables_after_compaction = SSTableFileNames(); + LOG(INFO) << "SSTable files after compaction: " << sstables_after_compaction.size() + << " (" << yb::ToString(sstables_after_compaction) << ")"; + for (const auto& remaining_file : remaining_file_names) { + ASSERT_TRUE( + std::find(sstables_after_compaction.begin(), sstables_after_compaction.end(), + remaining_file) != sstables_after_compaction.end() + ) << "File " << remaining_file << " not found in file list after compaction: " + << yb::ToString(sstables_after_compaction) << ", even though none of these files were " + << "supposed to be compacted: " << yb::ToString(remaining_file_names); + } + } + + rocksdb_->GetColumnFamilyMetaData(&cf_meta); + vector files_after_compaction; + for (const auto& sst_meta : cf_meta.levels[0].files) { + files_after_compaction.push_back(sst_meta.name); + } + const int64_t expected_resulting_num_files = initial_num_files - num_files_to_compact + 1; + ASSERT_EQ(expected_resulting_num_files, + static_cast(cf_meta.levels[0].files.size())) + << "Files after compaction: " << yb::ToString(files_after_compaction); +} + +int DocDBRocksDBFixture::NumSSTableFiles() { + rocksdb::ColumnFamilyMetaData cf_meta; + rocksdb_->GetColumnFamilyMetaData(&cf_meta); + return cf_meta.levels[0].files.size(); +} + +StringVector DocDBRocksDBFixture::SSTableFileNames() { + rocksdb::ColumnFamilyMetaData cf_meta; + rocksdb_->GetColumnFamilyMetaData(&cf_meta); + StringVector files; + for (const auto& sstable_meta : cf_meta.levels[0].files) { + files.push_back(sstable_meta.name); + } + SortByKey(files.begin(), files.end(), rocksdb::TableFileNameToNumber); + return files; } Status DocDBRocksDBFixture::FormatDocWriteBatch(const DocWriteBatch &dwb, string* dwb_str) { @@ -598,7 +709,12 @@ string DocDBRocksDBFixture::tablet_id() { } Status DocDBRocksDBFixture::InitRocksDBOptions() { - return InitCommonRocksDBOptions(); + RETURN_NOT_OK(InitCommonRocksDBOptions()); + return Status::OK(); +} + +string TrimDocDbDebugDumpStr(const string& debug_dump_str) { + return TrimStr(ApplyEagerLineContinuation(LeftShiftTextBlock(TrimCppComments(debug_dump_str)))); } } // namespace docdb diff --git a/src/yb/docdb/docdb_test_util.h b/src/yb/docdb/docdb_test_util.h index ffd65817900e..09811cd6fb9a 100644 --- a/src/yb/docdb/docdb_test_util.h +++ b/src/yb/docdb/docdb_test_util.h @@ -99,7 +99,16 @@ class LogicalRocksDBDebugSnapshot { class DocDBRocksDBFixture : public DocDBRocksDBUtil { public: void AssertDocDbDebugDumpStrEq(const string &expected); - void CompactHistoryBefore(HybridTime history_cutoff); + void FullyCompactHistoryBefore(HybridTime history_cutoff); + + // num_files_to_compact - number of files that should participate in the minor compaction + // start_index - the index of the file to start with (0 = the oldest file, -1 = compact + // num_files_to_compact newest files). + void MinorCompaction(HybridTime history_cutoff, int num_files_to_compact, int start_index = -1); + + int NumSSTableFiles(); + StringVector SSTableFileNames(); + CHECKED_STATUS InitRocksDBDir() override; CHECKED_STATUS InitRocksDBOptions() override; TabletId tablet_id() override; @@ -229,6 +238,10 @@ class DocDBLoadGenerator { TransactionOperationContextOpt GetReadOperationTransactionContext(); }; +// Used for pre-processing multi-line DocDB debug dump strings in tests. Removes common indentation +// and C++-style comments and applies backslash line continuation. +string TrimDocDbDebugDumpStr(const string& debug_dump); + #define ASSERT_DOCDB_DEBUG_DUMP_STR_EQ(expected) \ do { \ ASSERT_STR_EQ_VERBOSE_TRIMMED( \ diff --git a/src/yb/docdb/docdb_util.cc b/src/yb/docdb/docdb_util.cc index 816dd18e71de..4054c5d31a5c 100644 --- a/src/yb/docdb/docdb_util.cc +++ b/src/yb/docdb/docdb_util.cc @@ -294,8 +294,9 @@ void DocDBRocksDBUtil::DocDBDebugDumpToConsole() { DocDBDebugDump(rocksdb_.get(), std::cerr); } -Status DocDBRocksDBUtil::FlushRocksDB() { +Status DocDBRocksDBUtil::FlushRocksDbAndWait() { rocksdb::FlushOptions flush_options; + flush_options.wait = true; return rocksdb()->Flush(flush_options); } diff --git a/src/yb/docdb/docdb_util.h b/src/yb/docdb/docdb_util.h index 91b43312624a..edafd7be8b51 100644 --- a/src/yb/docdb/docdb_util.h +++ b/src/yb/docdb/docdb_util.h @@ -140,7 +140,7 @@ class DocDBRocksDBUtil { void DocDBDebugDumpToConsole(); - CHECKED_STATUS FlushRocksDB(); + CHECKED_STATUS FlushRocksDbAndWait(); void SetTableTTL(uint64_t ttl_msec); diff --git a/src/yb/docdb/intent_aware_iterator.h b/src/yb/docdb/intent_aware_iterator.h index 288927f2ae58..911ede38176f 100644 --- a/src/yb/docdb/intent_aware_iterator.h +++ b/src/yb/docdb/intent_aware_iterator.h @@ -137,7 +137,7 @@ class IntentAwareIterator { CHECKED_STATUS FindLastWriteTime( const Slice& key_without_ht, DocHybridTime* max_deleted_ts, - Value* result_value); + Value* result_value = nullptr); private: // Seek forward on regular sub-iterator. diff --git a/src/yb/docdb/primitive_value.h b/src/yb/docdb/primitive_value.h index 29da2a5cd0dc..a72bc3f9c1ff 100644 --- a/src/yb/docdb/primitive_value.h +++ b/src/yb/docdb/primitive_value.h @@ -32,6 +32,7 @@ #include "yb/docdb/value_type.h" #include "yb/util/decimal.h" #include "yb/util/timestamp.h" +#include "yb/util/algorithm_util.h" namespace yb { namespace docdb { @@ -43,11 +44,6 @@ enum class SystemColumnIds : ColumnIdRep { kLivenessColumn = 0 // Stores the TTL for QL rows inserted using an INSERT statement. }; -enum class SortOrder : int8_t { - kAscending = 0, - kDescending -}; - class PrimitiveValue { public: static const PrimitiveValue kInvalidPrimitiveValue; @@ -495,7 +491,7 @@ template inline void AppendPrimitiveValues(std::vector* dest, T first_arg, U... more_args) { - dest->push_back(PrimitiveValue(first_arg)); + dest->emplace_back(first_arg); AppendPrimitiveValues(dest, more_args...); } diff --git a/src/yb/docdb/randomized_docdb-test.cc b/src/yb/docdb/randomized_docdb-test.cc index 09629bb53329..c29522df32bb 100644 --- a/src/yb/docdb/randomized_docdb-test.cc +++ b/src/yb/docdb/randomized_docdb-test.cc @@ -123,7 +123,7 @@ void RandomizedDocDBTest::RunWorkloadWithSnaphots(bool enable_history_cleanup) { } ASSERT_NO_FATALS(load_gen_->PerformOperation()) << "at iteration " << current_iteration; if (current_iteration % kFlushFrequency == 0) { - ASSERT_OK(FlushRocksDB()); + ASSERT_OK(FlushRocksDbAndWait()); } if (current_iteration % snapshot_frequency == 0) { load_gen_->CaptureDocDbSnapshot(); @@ -140,7 +140,7 @@ void RandomizedDocDBTest::RunWorkloadWithSnaphots(bool enable_history_cleanup) { // We are performing cleanup at an old hybrid_time, and don't expect it to have any effect. InMemDocDbState snapshot_before_cleanup; snapshot_before_cleanup.CaptureAt(rocksdb(), HybridTime::kMax); - ASSERT_NO_FATALS(CompactHistoryBefore(cleanup_ht)); + ASSERT_NO_FATALS(FullyCompactHistoryBefore(cleanup_ht)); InMemDocDbState snapshot_after_cleanup; snapshot_after_cleanup.CaptureAt(rocksdb(), HybridTime::kMax); @@ -149,7 +149,7 @@ void RandomizedDocDBTest::RunWorkloadWithSnaphots(bool enable_history_cleanup) { max_history_cleanup_ht = cleanup_ht; cleanup_ht_and_iteration.emplace_back(cleanup_ht.value(), load_gen_->last_operation_ht().value()); - ASSERT_NO_FATALS(CompactHistoryBefore(cleanup_ht)); + ASSERT_NO_FATALS(FullyCompactHistoryBefore(cleanup_ht)); // We expect some snapshots at hybrid_times earlier than cleanup_ht to no longer be // recoverable. diff --git a/src/yb/docdb/value.cc b/src/yb/docdb/value.cc index b2bed9307ec0..aab580dcd626 100644 --- a/src/yb/docdb/value.cc +++ b/src/yb/docdb/value.cc @@ -140,5 +140,15 @@ Status Value::DecodePrimitiveValueType(const rocksdb::Slice& rocksdb_value, return Status::OK(); } +const Value& Value::Tombstone() { + static const auto kTombstone = Value(PrimitiveValue::kTombstone); + return kTombstone; +} + +const string& Value::EncodedTombstone() { + static const string kEncodedTombstone = Tombstone().Encode(); + return kEncodedTombstone; +} + } // namespace docdb } // namespace yb diff --git a/src/yb/docdb/value.h b/src/yb/docdb/value.h index ac12b2e88fe3..22e28c692ab6 100644 --- a/src/yb/docdb/value.h +++ b/src/yb/docdb/value.h @@ -76,14 +76,17 @@ class Value { void EncodeAndAppend(std::string* value_bytes) const; - // Decodes the ValueType of the primitive value stored in the given rocksdb_value. + // Decodes the ValueType of the primitive value stored in the given RocksDB value. static CHECKED_STATUS DecodePrimitiveValueType(const rocksdb::Slice& rocksdb_value, ValueType* value_type); - // Return the user timestamp portion from a slice that points to the rocksdb_value. + // Return the user timestamp portion from a slice that points to the RocksDB value. static CHECKED_STATUS DecodeUserTimestamp(const rocksdb::Slice& rocksdb_value, UserTimeMicros* user_timestamp); + static const Value& Tombstone(); + static const std::string& EncodedTombstone(); + private: // Consume the timestamp portion of the slice assuming the beginning of the slice points to // the timestamp. diff --git a/src/yb/rocksdb/db/db_compaction_test.cc b/src/yb/rocksdb/db/db_compaction_test.cc index 081a5a5ad6f7..068e536d1f5a 100644 --- a/src/yb/rocksdb/db/db_compaction_test.cc +++ b/src/yb/rocksdb/db/db_compaction_test.cc @@ -2206,7 +2206,7 @@ TEST_F(DBCompactionTest, FilesDeletedAfterCompaction) { } while (ChangeCompactOptions()); } -// Check level comapction with compact files +// Check level compaction with compact files TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) { const int kTestKeySize = 16; const int kTestValueSize = 984; diff --git a/src/yb/tools/yb-bulk_load.cc b/src/yb/tools/yb-bulk_load.cc index e4665fb0ea6b..f9ea438cea76 100644 --- a/src/yb/tools/yb-bulk_load.cc +++ b/src/yb/tools/yb-bulk_load.cc @@ -180,7 +180,7 @@ void BulkLoadTask::Run() { /* decode_dockey */ false, /* increment_write_id */ false)); if (FLAGS_flush_batch_for_tests) { - CHECK_OK(db_fixture_->FlushRocksDB()); + CHECK_OK(db_fixture_->FlushRocksDbAndWait()); } } @@ -372,7 +372,7 @@ Status BulkLoad::FinishTabletProcessing(const TabletId &tablet_id, thread_pool_->Wait(); // Now flush the DB. - RETURN_NOT_OK(db_fixture_->FlushRocksDB()); + RETURN_NOT_OK(db_fixture_->FlushRocksDbAndWait()); // Perform the necessary compactions. RETURN_NOT_OK(CompactFiles()); diff --git a/src/yb/util/algorithm_util.h b/src/yb/util/algorithm_util.h new file mode 100644 index 000000000000..aaa7fdecfb5b --- /dev/null +++ b/src/yb/util/algorithm_util.h @@ -0,0 +1,42 @@ +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#ifndef YB_UTIL_ALGORITHM_UTIL_H +#define YB_UTIL_ALGORITHM_UTIL_H + +#include + +#include "yb/util/enums.h" + +namespace yb { + +enum class SortOrder : uint8_t { + kAscending = 0, + kDescending +}; + +template +void SortByKey(Iterator begin, + Iterator end, + const Functor& f, + SortOrder sort_order = SortOrder::kAscending) { + using Value = typename Iterator::value_type; + const bool invert_order = sort_order == SortOrder::kDescending; + std::sort(begin, end, [invert_order, &f](const Value& a, const Value& b){ + return f(a) < f(b) != invert_order; + }); +} + +}; // namespace yb + +#endif // YB_UTIL_ALGORITHM_UTIL_H diff --git a/src/yb/util/split.h b/src/yb/util/split.h index 5a33df2ecb1f..0cd5195a1cec 100644 --- a/src/yb/util/split.h +++ b/src/yb/util/split.h @@ -9,7 +9,6 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations // under the License. -// #ifndef YB_UTIL_SPLIT_H_ #define YB_UTIL_SPLIT_H_ diff --git a/src/yb/util/string_trim-test.cc b/src/yb/util/string_trim-test.cc index 92f8b2f63835..730bbdc993d0 100644 --- a/src/yb/util/string_trim-test.cc +++ b/src/yb/util/string_trim-test.cc @@ -53,5 +53,20 @@ This is my )#")); } +TEST(StringTrimTest, TestTrimCppComments) { + ASSERT_EQ( + R"#( +Line1 + +Line2 +)#", + TrimCppComments(R"#( +Line1 // This is a comment + +Line2 // This is a comment too +)#") + ); +} + } // namespace util } // namespace yb diff --git a/src/yb/util/string_trim.cc b/src/yb/util/string_trim.cc index 64d7dd4e14c1..424c78559767 100644 --- a/src/yb/util/string_trim.cc +++ b/src/yb/util/string_trim.cc @@ -19,6 +19,7 @@ #include #include #include +#include using std::string; using std::vector; @@ -101,5 +102,10 @@ string LeftShiftTextBlock(const std::string& s) { return result; } +std::string TrimCppComments(const std::string& s) { + static const std::regex kCppCommentRE = std::regex("\\s*//[^\n]+"); + return std::regex_replace(s, kCppCommentRE, ""); +} + } // namespace util } // namespace yb diff --git a/src/yb/util/string_trim.h b/src/yb/util/string_trim.h index afee1a8d00a4..905011a28b4f 100644 --- a/src/yb/util/string_trim.h +++ b/src/yb/util/string_trim.h @@ -57,6 +57,10 @@ std::string LeftShiftTextBlock(const std::string& s); // respecting the maximum line length rule in expected test output specified using raw literals. std::string ApplyEagerLineContinuation(const std::string& s); +// Trim C++-style comments from each line of the given string. Used to allow including comments +// inside DocDB debug dump string constants. +std::string TrimCppComments(const std::string& s); + } // namespace util } // namespace yb diff --git a/src/yb/util/string_util.h b/src/yb/util/string_util.h index 2232a2a192d7..896bf72e44cd 100644 --- a/src/yb/util/string_util.h +++ b/src/yb/util/string_util.h @@ -32,7 +32,8 @@ namespace yb { -std::vector StringSplit(const std::string& arg, char delim); +using StringVector = std::vector; +StringVector StringSplit(const std::string& arg, char delim); template inline std::string VectorToString(const std::vector& vec) { diff --git a/yb_build.sh b/yb_build.sh index 09b538b63f1f..f7160a33b76a 100755 --- a/yb_build.sh +++ b/yb_build.sh @@ -393,8 +393,11 @@ run_cxx_test() { # summary information is displayed. This option will show all test output. # --output-on-failure is unnecessary when --verbose is specified. In fact, adding # --output-on-failure will result in duplicate output in case of a failure. + # + # In this verbose mode, ctest also adds some number (test number?) with a colon in the + # beginning of every line of the output. We filter that out. set -x - ctest --verbose -R ^"$cxx_test_name"$ + ctest --verbose -R ^"$cxx_test_name"$ 2>&1 | sed 's/^[0-9][0-9]*: //g' ) else (