diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index d7d57bbf519b..1832f1303191 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -465,6 +465,11 @@ bool Compaction::IsTrivialMove() const { return false; } + if (compaction_reason_ == CompactionReason::kChangeTemperature) { + // Changing temperature usually requires rewriting the file. + return false; + } + // Used in universal compaction, where trivial move can be done if the // input files are non overlapping if ((mutable_cf_options_.compaction_options_universal.allow_trivial_move) && diff --git a/db/compaction/compaction_picker_fifo.cc b/db/compaction/compaction_picker_fifo.cc index 1b01adbfc334..333d66feffa7 100644 --- a/db/compaction/compaction_picker_fifo.cc +++ b/db/compaction/compaction_picker_fifo.cc @@ -16,6 +16,7 @@ #include "db/column_family.h" #include "logging/log_buffer.h" #include "logging/logging.h" +#include "options/options_helper.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -284,15 +285,18 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction( return c; } -Compaction* FIFOCompactionPicker::PickCompactionToWarm( +Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer) { - if (mutable_cf_options.compaction_options_fifo.age_for_warm == 0) { + const std::vector& ages = + mutable_cf_options.compaction_options_fifo + .file_temperature_age_thresholds; + if (ages.empty()) { return nullptr; } - // PickCompactionToWarm is only triggered if there is no non-L0 files. + // Does not apply to multi-level FIFO. for (int level = 1; level < vstorage->num_levels(); ++level) { if (GetTotalFilesSize(vstorage->LevelFiles(level)) > 0) { return nullptr; @@ -301,14 +305,18 @@ Compaction* FIFOCompactionPicker::PickCompactionToWarm( const int kLevel0 = 0; const std::vector& level_files = vstorage->LevelFiles(kLevel0); + if (level_files.empty()) { + return nullptr; + } int64_t _current_time; auto status = ioptions_.clock->GetCurrentTime(&_current_time); if (!status.ok()) { - ROCKS_LOG_BUFFER(log_buffer, - "[%s] FIFO compaction: Couldn't get current time: %s. " - "Not doing compactions based on warm threshold. ", - cf_name.c_str(), status.ToString().c_str()); + ROCKS_LOG_BUFFER( + log_buffer, + "[%s] FIFO compaction: Couldn't get current time: %s. " + "Not doing compactions based on file temperature-age threshold. ", + cf_name.c_str(), status.ToString().c_str()); return nullptr; } const uint64_t current_time = static_cast(_current_time); @@ -327,56 +335,77 @@ Compaction* FIFOCompactionPicker::PickCompactionToWarm( inputs[0].level = 0; // avoid underflow - if (current_time > mutable_cf_options.compaction_options_fifo.age_for_warm) { - uint64_t create_time_threshold = - current_time - mutable_cf_options.compaction_options_fifo.age_for_warm; + uint64_t min_age = ages[0].age; + // kLastTemperature means target temperature is to be determined. + Temperature compaction_target_temp = Temperature::kLastTemperature; + if (current_time > min_age) { + uint64_t create_time_threshold = current_time - min_age; uint64_t compaction_size = 0; - // We will ideally identify a file qualifying for warm tier by knowing - // the timestamp for the youngest entry in the file. However, right now - // we don't have the information. We infer it by looking at timestamp - // of the next file's (which is just younger) oldest entry's timestamp. - FileMetaData* prev_file = nullptr; - for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { - FileMetaData* f = *ritr; - assert(f); - if (f->being_compacted) { - // Right now this probably won't happen as we never try to schedule - // two compactions in parallel, so here we just simply don't schedule - // anything. + // We will ideally identify a file qualifying for temperature change by + // knowing the timestamp for the youngest entry in the file. However, right + // now we don't have the information. We infer it by looking at timestamp of + // the previous file's (which is just younger) oldest entry's timestamp. + Temperature cur_target_temp; + // avoid index underflow + assert(level_files.size() >= 1); + for (size_t index = level_files.size() - 1; index >= 1; --index) { + // Try to add cur_file to compaction inputs. + FileMetaData* cur_file = level_files[index]; + // prev_file is just younger than cur_file + FileMetaData* prev_file = level_files[index - 1]; + if (cur_file->being_compacted) { + // Should not happen since we check for + // `level0_compactions_in_progress_` above. Here we simply just don't + // schedule anything. return nullptr; } - uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime(); - if (oldest_ancester_time == kUnknownOldestAncesterTime) { + uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime(); + if (oldest_ancestor_time == kUnknownOldestAncesterTime) { // Older files might not have enough information. It is possible to // handle these files by looking at newer files, but maintaining the // logic isn't worth it. break; } - if (oldest_ancester_time > create_time_threshold) { - // The previous file (which has slightly older data) doesn't qualify - // for warm tier. + if (oldest_ancestor_time > create_time_threshold) { + // cur_file is too fresh break; } - if (prev_file != nullptr) { - compaction_size += prev_file->fd.GetFileSize(); - if (compaction_size > mutable_cf_options.max_compaction_bytes) { + cur_target_temp = ages[0].temperature; + for (size_t i = 1; i < ages.size(); ++i) { + if (current_time >= ages[i].age && + oldest_ancestor_time <= current_time - ages[i].age) { + cur_target_temp = ages[i].temperature; + } + } + if (cur_file->temperature == cur_target_temp) { + if (inputs[0].empty()) { + continue; + } else { break; } - inputs[0].files.push_back(prev_file); - ROCKS_LOG_BUFFER(log_buffer, - "[%s] FIFO compaction: picking file %" PRIu64 - " with next file's oldest time %" PRIu64 " for warm", - cf_name.c_str(), prev_file->fd.GetNumber(), - oldest_ancester_time); } - if (f->temperature == Temperature::kUnknown || - f->temperature == Temperature::kHot) { - prev_file = f; - } else if (!inputs[0].files.empty()) { - // A warm file newer than files picked. + + // cur_file needs to change temperature + if (compaction_target_temp == Temperature::kLastTemperature) { + assert(inputs[0].empty()); + compaction_target_temp = cur_target_temp; + } else if (cur_target_temp != compaction_target_temp) { + assert(!inputs[0].empty()); + break; + } + if (inputs[0].empty() || compaction_size + cur_file->fd.GetFileSize() <= + mutable_cf_options.max_compaction_bytes) { + inputs[0].files.push_back(cur_file); + compaction_size += cur_file->fd.GetFileSize(); + ROCKS_LOG_BUFFER( + log_buffer, + "[%s] FIFO compaction: picking file %" PRIu64 + " with next file's oldest time %" PRIu64 " for temperature %s.", + cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time, + temperature_to_string[cur_target_temp].c_str()); + } + if (compaction_size > mutable_cf_options.max_compaction_bytes) { break; - } else { - assert(prev_file == nullptr); } } } @@ -390,7 +419,7 @@ Compaction* FIFOCompactionPicker::PickCompactionToWarm( std::move(inputs), 0, 0 /* output file size limit */, 0 /* max compaction bytes, not applicable */, 0 /* output path ID */, mutable_cf_options.compression, mutable_cf_options.compression_opts, - Temperature::kWarm, + compaction_target_temp, /* max_subcompactions */ 0, {}, /* is manual */ false, /* trim_ts */ "", vstorage->CompactionScore(0), /* is deletion compaction */ false, /* l0_files_might_overlap */ true, @@ -412,8 +441,8 @@ Compaction* FIFOCompactionPicker::PickCompaction( vstorage, log_buffer); } if (c == nullptr) { - c = PickCompactionToWarm(cf_name, mutable_cf_options, mutable_db_options, - vstorage, log_buffer); + c = PickTemperatureChangeCompaction( + cf_name, mutable_cf_options, mutable_db_options, vstorage, log_buffer); } RegisterCompaction(c); return c; diff --git a/db/compaction/compaction_picker_fifo.h b/db/compaction/compaction_picker_fifo.h index 23f08fe97e41..df21a1bde0f2 100644 --- a/db/compaction/compaction_picker_fifo.h +++ b/db/compaction/compaction_picker_fifo.h @@ -52,10 +52,9 @@ class FIFOCompactionPicker : public CompactionPicker { VersionStorageInfo* version, LogBuffer* log_buffer); - Compaction* PickCompactionToWarm(const std::string& cf_name, - const MutableCFOptions& mutable_cf_options, - const MutableDBOptions& mutable_db_options, - VersionStorageInfo* version, - LogBuffer* log_buffer); + Compaction* PickTemperatureChangeCompaction( + const std::string& cf_name, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer); }; } // namespace ROCKSDB_NAMESPACE diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index c9cbb09ed379..3e6dc24bc5e2 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -1005,29 +1005,28 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { } } -TEST_F(CompactionPickerTest, FIFOToWarm1) { +TEST_F(CompactionPickerTest, FIFOToCold1) { NewVersionStorage(1, kCompactionStyleFIFO); const uint64_t kFileSize = 100000; const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 2000; + uint64_t kColdThreshold = 2000; fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.age_for_warm = kWarmThreshold; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; mutable_cf_options_.max_compaction_bytes = kFileSize * 100; FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); int64_t current_time = 0; ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); uint64_t threshold_time = - static_cast(current_time) - kWarmThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); + static_cast(current_time) - kColdThreshold; + Add(0 /* level */, 4U /* file_number */, "260", "300", 1 * kFileSize, 0, 2500, + 2600, 0, true, Temperature::kUnknown, + threshold_time - 2000 /* oldest_ancestor_time */); + // Qualifies for compaction to kCold. Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, Temperature::kUnknown, threshold_time - 3000); UpdateVersionStorageInfo(); @@ -1037,33 +1036,36 @@ TEST_F(CompactionPickerTest, FIFOToWarm1) { cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); } -TEST_F(CompactionPickerTest, FIFOToWarm2) { +TEST_F(CompactionPickerTest, FIFOToCold2) { NewVersionStorage(1, kCompactionStyleFIFO); const uint64_t kFileSize = 100000; const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 2000; + uint64_t kColdThreshold = 2000; fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.age_for_warm = kWarmThreshold; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; mutable_cf_options_.max_compaction_bytes = kFileSize * 100; FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); int64_t current_time = 0; ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); uint64_t threshold_time = - static_cast(current_time) - kWarmThreshold; + static_cast(current_time) - kColdThreshold; Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); + Temperature::kUnknown, threshold_time); + // The following two files qualify for compaction to kCold. Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, Temperature::kUnknown, threshold_time - 3000); Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, @@ -1075,34 +1077,40 @@ TEST_F(CompactionPickerTest, FIFOToWarm2) { cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(3U, compaction->input(0, 1)->fd.GetNumber()); } -TEST_F(CompactionPickerTest, FIFOToWarmMaxSize) { +TEST_F(CompactionPickerTest, FIFOToColdMaxCompactionSize) { NewVersionStorage(1, kCompactionStyleFIFO); const uint64_t kFileSize = 100000; const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 2000; + uint64_t kColdThreshold = 2000; fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.age_for_warm = kWarmThreshold; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; mutable_cf_options_.max_compaction_bytes = kFileSize * 9; FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); int64_t current_time = 0; ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); uint64_t threshold_time = - static_cast(current_time) - kWarmThreshold; + static_cast(current_time) - kColdThreshold; Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, Temperature::kUnknown, static_cast(current_time) - 100); Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, Temperature::kUnknown, threshold_time + 100); Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, Temperature::kUnknown, threshold_time - 2000); + // The following two files qualify for compaction to kCold. + // But only the last two should be included to respect `max_compaction_bytes`. Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, Temperature::kUnknown, threshold_time - 3000); Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, @@ -1116,40 +1124,45 @@ TEST_F(CompactionPickerTest, FIFOToWarmMaxSize) { cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); } -TEST_F(CompactionPickerTest, FIFOToWarmWithExistingWarm) { +TEST_F(CompactionPickerTest, FIFOToColdWithExistingCold) { NewVersionStorage(1, kCompactionStyleFIFO); const uint64_t kFileSize = 100000; const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 2000; + uint64_t kColdThreshold = 2000; fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.age_for_warm = kWarmThreshold; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; mutable_cf_options_.max_compaction_bytes = kFileSize * 100; FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); int64_t current_time = 0; ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); uint64_t threshold_time = - static_cast(current_time) - kWarmThreshold; + static_cast(current_time) - kColdThreshold; Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, Temperature::kUnknown, static_cast(current_time) - 100); Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, Temperature::kUnknown, threshold_time + 100); Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, Temperature::kUnknown, threshold_time - 2000); + // The following two files qualify for compaction to kCold. Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, Temperature::kUnknown, threshold_time - 3000); Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, Temperature::kUnknown, threshold_time - 4000); Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kWarm, threshold_time - 5000); + Temperature::kCold, threshold_time - 5000); UpdateVersionStorageInfo(); ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); @@ -1157,28 +1170,32 @@ TEST_F(CompactionPickerTest, FIFOToWarmWithExistingWarm) { cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(3U, compaction->input(0, 1)->fd.GetNumber()); } -TEST_F(CompactionPickerTest, FIFOToWarmWithOngoing) { +TEST_F(CompactionPickerTest, FIFOToColdWithHotBetweenCold) { NewVersionStorage(1, kCompactionStyleFIFO); const uint64_t kFileSize = 100000; const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 2000; + uint64_t kColdThreshold = 2000; fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.age_for_warm = kWarmThreshold; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; mutable_cf_options_.max_compaction_bytes = kFileSize * 100; FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); int64_t current_time = 0; ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); uint64_t threshold_time = - static_cast(current_time) - kWarmThreshold; + static_cast(current_time) - kColdThreshold; Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, Temperature::kUnknown, static_cast(current_time) - 100); Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, @@ -1186,64 +1203,78 @@ TEST_F(CompactionPickerTest, FIFOToWarmWithOngoing) { Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, Temperature::kUnknown, threshold_time - 2000); Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, threshold_time - 3000); + Temperature::kCold, threshold_time - 3000); + // Qualifies for compaction to kCold. Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, Temperature::kUnknown, threshold_time - 4000); Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kWarm, threshold_time - 5000); - file_map_[2].first->being_compacted = true; + Temperature::kCold, threshold_time - 5000); UpdateVersionStorageInfo(); ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), &log_buffer_)); - // Stop if a file is being compacted - ASSERT_TRUE(compaction.get() == nullptr); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); } -TEST_F(CompactionPickerTest, FIFOToWarmWithHotBetweenWarms) { +TEST_F(CompactionPickerTest, FIFOToColdAndWarm) { NewVersionStorage(1, kCompactionStyleFIFO); const uint64_t kFileSize = 100000; const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 2000; + uint64_t kWarmThreshold = 10000; + uint64_t kHotThreshold = 2000; fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.age_for_warm = kWarmThreshold; + // Test that multiple threshold works. + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kHot, kHotThreshold}, {Temperature::kWarm, kWarmThreshold}}; mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; mutable_cf_options_.max_compaction_bytes = kFileSize * 100; FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); int64_t current_time = 0; ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = + uint64_t hot_threshold_time = + static_cast(current_time) - kHotThreshold; + uint64_t warm_threshold_time = static_cast(current_time) - kWarmThreshold; Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, Temperature::kUnknown, static_cast(current_time) - 100); Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); + Temperature::kUnknown, hot_threshold_time + 100); Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); + Temperature::kUnknown, hot_threshold_time - 200); + // Qualifies for Hot Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kWarm, threshold_time - 3000); + Temperature::kUnknown, warm_threshold_time - 100); + // Qualifies for Warm Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, threshold_time - 4000); + Temperature::kUnknown, warm_threshold_time - 4000); Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kWarm, threshold_time - 5000); + Temperature::kUnknown, warm_threshold_time - 5000); UpdateVersionStorageInfo(); ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), &log_buffer_)); - // Stop if a file is being compacted ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + // Assumes compaction picker picks older files first. + ASSERT_EQ(compaction->output_temperature(), Temperature::kWarm); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); } - TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) { NewVersionStorage(6, kCompactionStyleLevel); ioptions_.compaction_pri = kMinOverlappingRatio; diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index fb4c2d0ed6b6..5f5601f118cb 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6740,10 +6740,8 @@ class DBCompactionTestL0FilesMisorderCorruption : public DBCompactionTest { if (compaction_path_to_test == "FindIntraL0Compaction" || compaction_path_to_test == "CompactRange") { fifo_options.allow_compaction = true; - fifo_options.age_for_warm = 0; } else if (compaction_path_to_test == "CompactFile") { fifo_options.allow_compaction = false; - fifo_options.age_for_warm = 0; } options_.compaction_options_fifo = fifo_options; } @@ -8593,7 +8591,7 @@ TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest2) { Destroy(options); } -TEST_F(DBCompactionTest, FIFOWarm) { +TEST_F(DBCompactionTest, FIFOChangeTemperature) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleFIFO; options.num_levels = 1; @@ -8601,18 +8599,18 @@ TEST_F(DBCompactionTest, FIFOWarm) { options.level0_file_num_compaction_trigger = 2; options.create_if_missing = true; CompactionOptionsFIFO fifo_options; - fifo_options.age_for_warm = 1000; + fifo_options.file_temperature_age_thresholds = {{Temperature::kCold, 1000}}; fifo_options.max_table_files_size = 100000000; options.compaction_options_fifo = fifo_options; env_->SetMockSleep(); Reopen(options); - int total_warm = 0; + int total_cold = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "NewWritableFile::FileOptions.temperature", [&](void* arg) { Temperature temperature = *(static_cast(arg)); - if (temperature == Temperature::kWarm) { - total_warm++; + if (temperature == Temperature::kCold) { + total_cold++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); @@ -8649,9 +8647,9 @@ TEST_F(DBCompactionTest, FIFOWarm) { ASSERT_EQ(4, metadata.file_count); ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[1].temperature); - ASSERT_EQ(Temperature::kWarm, metadata.levels[0].files[2].temperature); - ASSERT_EQ(Temperature::kWarm, metadata.levels[0].files[3].temperature); - ASSERT_EQ(2, total_warm); + ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[2].temperature); + ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[3].temperature); + ASSERT_EQ(2, total_cold); Destroy(options); } diff --git a/db/db_options_test.cc b/db/db_options_test.cc index 3304c63393ba..14dd3a56bd16 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1009,6 +1009,25 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_GE(NumTableFilesAtLevel(0), 1); ASSERT_LE(NumTableFilesAtLevel(0), 5); + + // Test dynamically setting `file_temperature_age_thresholds` + ASSERT_TRUE( + dbfull() + ->GetOptions() + .compaction_options_fifo.file_temperature_age_thresholds.empty()); + ASSERT_OK(dbfull()->SetOptions({{"compaction_options_fifo", + "{file_temperature_age_thresholds={{age=10;" + "temperature=kWarm}:{age=30000;" + "temperature=kCold}}}"}})); + const auto& fifo_temp_opt = + dbfull() + ->GetOptions() + .compaction_options_fifo.file_temperature_age_thresholds; + ASSERT_EQ(fifo_temp_opt.size(), 2); + ASSERT_EQ(fifo_temp_opt[0].temperature, Temperature::kWarm); + ASSERT_EQ(fifo_temp_opt[0].age, 10); + ASSERT_EQ(fifo_temp_opt[1].temperature, Temperature::kCold); + ASSERT_EQ(fifo_temp_opt[1].age, 30000); } TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { @@ -1063,12 +1082,20 @@ TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { // ttl under compaction_options_fifo. ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", - "{allow_compaction=true;max_table_files_size=1024;ttl=731;}"}, + "{allow_compaction=true;max_table_files_size=1024;ttl=731;file_" + "temperature_age_thresholds={temperature=kCold;age=12345}}"}, {"ttl", "60"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, true); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 1024); + const auto& file_temp_age = + dbfull() + ->GetOptions() + .compaction_options_fifo.file_temperature_age_thresholds; + ASSERT_EQ(file_temp_age.size(), 1); + ASSERT_EQ(file_temp_age[0].temperature, Temperature::kCold); + ASSERT_EQ(file_temp_age[0].age, 12345); ASSERT_EQ(dbfull()->GetOptions().ttl, 60); // Put ttl as the first option inside compaction_options_fifo. That works as @@ -1081,6 +1108,9 @@ TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { true); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 1024); + ASSERT_EQ(file_temp_age.size(), 1); + ASSERT_EQ(file_temp_age[0].temperature, Temperature::kCold); + ASSERT_EQ(file_temp_age[0].age, 12345); ASSERT_EQ(dbfull()->GetOptions().ttl, 191); } diff --git a/db/version_set.cc b/db/version_set.cc index cfe8d6173662..23a3cd3ad0f7 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -37,6 +37,7 @@ #include "db/pinned_iterators_manager.h" #include "db/table_cache.h" #include "db/version_builder.h" +#include "db/version_edit.h" #include "db/version_edit_handler.h" #include "table/compaction_merging_iterator.h" @@ -3282,6 +3283,55 @@ uint32_t GetExpiredTtlFilesCount(const ImmutableOptions& ioptions, } return ttl_expired_files_count; } + +bool ShouldChangeFileTemperature(const ImmutableOptions& ioptions, + const MutableCFOptions& mutable_cf_options, + const std::vector& files) { + const std::vector& ages = + mutable_cf_options.compaction_options_fifo + .file_temperature_age_thresholds; + if (ages.empty()) { + return false; + } + if (files.empty()) { + return false; + } + int64_t _current_time; + auto status = ioptions.clock->GetCurrentTime(&_current_time); + const uint64_t current_time = static_cast(_current_time); + // We use oldest_ancestor_time of a file to be the estimate age of + // the file just older than it. This is the same logic used in + // FIFOCompactionPicker::PickTemperatureChangeCompaction(). + if (status.ok() && current_time >= ages[0].age) { + uint64_t create_time_threshold = current_time - ages[0].age; + Temperature target_temp; + assert(files.size() >= 1); + for (size_t index = files.size() - 1; index >= 1; --index) { + FileMetaData* cur_file = files[index]; + FileMetaData* prev_file = files[index - 1]; + if (!cur_file->being_compacted) { + uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime(); + if (oldest_ancestor_time == kUnknownOldestAncesterTime) { + return false; + } + if (oldest_ancestor_time > create_time_threshold) { + return false; + } + target_temp = ages[0].temperature; + for (size_t i = 1; i < ages.size(); ++i) { + if (current_time >= ages[i].age && + oldest_ancestor_time <= current_time - ages[i].age) { + target_temp = ages[i].temperature; + } + } + if (cur_file->temperature != target_temp) { + return true; + } + } + } + } + return false; +} } // anonymous namespace void VersionStorageInfo::ComputeCompactionScore( @@ -3339,22 +3389,25 @@ void VersionStorageInfo::ComputeCompactionScore( if (compaction_style_ == kCompactionStyleFIFO) { score = static_cast(total_size) / mutable_cf_options.compaction_options_fifo.max_table_files_size; - if (mutable_cf_options.compaction_options_fifo.allow_compaction || - mutable_cf_options.compaction_options_fifo.age_for_warm > 0) { - // Warm tier move can happen at any time. It's too expensive to - // check very file's timestamp now. For now, just trigger it - // slightly more frequently than FIFO compaction so that this - // happens first. + if (score < 1 && + mutable_cf_options.compaction_options_fifo.allow_compaction) { score = std::max( static_cast(num_sorted_runs) / mutable_cf_options.level0_file_num_compaction_trigger, score); } - if (mutable_cf_options.ttl > 0) { - score = std::max( - static_cast(GetExpiredTtlFilesCount( - immutable_options, mutable_cf_options, files_[level])), - score); + if (score < 1 && mutable_cf_options.ttl > 0) { + score = + std::max(static_cast(GetExpiredTtlFilesCount( + immutable_options, mutable_cf_options, files_[0])), + score); + } + if (score < 1 && + ShouldChangeFileTemperature(immutable_options, mutable_cf_options, + files_[0])) { + // For FIFO, just need a large enough score to trigger compaction. + const double kScoreForNeedCompaction = 1.1; + score = kScoreForNeedCompaction; } } else { score = static_cast(num_sorted_runs) / diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index ff0a408958e3..4a7d1dbfa288 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -59,30 +59,6 @@ enum CompactionPri : char { kRoundRobin = 0x4, }; -struct CompactionOptionsFIFO { - // once the total sum of table files reaches this, we will delete the oldest - // table file - // Default: 1GB - uint64_t max_table_files_size; - - // If true, try to do compaction to compact smaller files into larger ones. - // Minimum files to compact follows options.level0_file_num_compaction_trigger - // and compaction won't trigger if average compact bytes per del file is - // larger than options.write_buffer_size. This is to protect large files - // from being compacted again. - // Default: false; - bool allow_compaction = false; - - // When not 0, if the data in the file is older than this threshold, RocksDB - // will soon move the file to warm temperature. - uint64_t age_for_warm = 0; - - CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {} - CompactionOptionsFIFO(uint64_t _max_table_files_size, bool _allow_compaction) - : max_table_files_size(_max_table_files_size), - allow_compaction(_allow_compaction) {} -}; - // Compression options for different compression algorithms like Zlib struct CompressionOptions { // ==> BEGIN options that can be set by deprecated configuration syntax, <== @@ -225,6 +201,57 @@ enum class Temperature : uint8_t { kLastTemperature, }; +struct FileTemperatureAge { + Temperature temperature = Temperature::kUnknown; + uint64_t age = 0; +}; + +struct CompactionOptionsFIFO { + // once the total sum of table files reaches this, we will delete the oldest + // table file + // Default: 1GB + uint64_t max_table_files_size; + + // If true, try to do compaction to compact smaller files into larger ones. + // Minimum files to compact follows options.level0_file_num_compaction_trigger + // and compaction won't trigger if average compact bytes per del file is + // larger than options.write_buffer_size. This is to protect large files + // from being compacted again. + // Default: false; + bool allow_compaction = false; + + // DEPRECATED + // When not 0, if the data in the file is older than this threshold, RocksDB + // will soon move the file to warm temperature. + uint64_t age_for_warm = 0; + + // EXPERIMENTAL + // Age (in seconds) threshold for different file temperatures. + // When not empty, each element specifies an age threshold `age` and a + // temperature such that if all the data in a file is older than `age`, + // RocksDB will compact the file to the specified `temperature`. By default, + // all file has temperature kUnknown. So only temperatures other than + // kUnknown needs to be specified. + // Note: + // - The elements should be in increasing order with respect to `age` field. + // Dynamically changeable through SetOptions() API, e.g., + // SetOptions("compaction_options_fifo", + // "{file_temperature_age_thresholds={ + // {age=20;temperature=kCold}:{age=10;temperature=kWarm}}") + // In this example, all files that are at least 20 seconds old will be + // compacted and output files will have temperature kCold. All files that are + // at least 10 seconds old but younger than 20 seconds will be compacted to + // files with temperature kWarm. + // + // Default: empty + std::vector file_temperature_age_thresholds{}; + + CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {} + CompactionOptionsFIFO(uint64_t _max_table_files_size, bool _allow_compaction) + : max_table_files_size(_max_table_files_size), + allow_compaction(_allow_compaction) {} +}; + // The control option of how the cache tiers will be used. Currently rocksdb // support block cache (volatile tier), secondary cache (non-volatile tier). // In the future, we may add more caching layers. diff --git a/options/cf_options.cc b/options/cf_options.cc index 0fccd5014341..ad1e669df59b 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -175,6 +175,17 @@ static std::unordered_map OptionTypeFlags::kMutable}}, }; +static std::unordered_map + file_temperature_age_type_info = { + {"temperature", + {offsetof(struct FileTemperatureAge, temperature), + OptionType::kTemperature, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, + {"age", + {offsetof(struct FileTemperatureAge, age), OptionType::kUInt64T, + OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, +}; + static std::unordered_map fifo_compaction_options_type_info = { {"max_table_files_size", @@ -192,7 +203,15 @@ static std::unordered_map {offsetof(struct CompactionOptionsFIFO, allow_compaction), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, -}; + {"file_temperature_age_thresholds", + OptionTypeInfo::Vector( + offsetof(struct CompactionOptionsFIFO, + file_temperature_age_thresholds), + OptionVerificationType::kNormal, OptionTypeFlags::kMutable, + OptionTypeInfo::Struct("file_temperature_age_thresholds", + &file_temperature_age_type_info, 0, + OptionVerificationType::kNormal, + OptionTypeFlags::kMutable))}}; static std::unordered_map universal_compaction_options_type_info = { diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 6357b5e9eeac..19cb6310f9c6 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -400,6 +400,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { {offsetof(struct ColumnFamilyOptions, max_bytes_for_level_multiplier_additional), sizeof(std::vector)}, + {offsetof(struct ColumnFamilyOptions, compaction_options_fifo), + sizeof(struct CompactionOptionsFIFO)}, {offsetof(struct ColumnFamilyOptions, memtable_factory), sizeof(std::shared_ptr)}, {offsetof(struct ColumnFamilyOptions, @@ -549,7 +551,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "preclude_last_level_data_seconds=86400;" "preserve_internal_time_seconds=86400;" "compaction_options_fifo={max_table_files_size=3;allow_" - "compaction=false;age_for_warm=1;};" + "compaction=true;age_for_warm=0;file_temperature_age_thresholds={{" + "temperature=kCold;age=12345}};};" "blob_cache=1M;" "memtable_protection_bytes_per_key=2;" "persist_user_defined_timestamps=true;" @@ -562,6 +565,22 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsExcluded)); + // Custom verification since compaction_options_fifo was in + // kColumnFamilyOptionsExcluded + ASSERT_EQ(new_options->compaction_options_fifo.max_table_files_size, 3); + ASSERT_EQ(new_options->compaction_options_fifo.allow_compaction, true); + ASSERT_EQ(new_options->compaction_options_fifo.file_temperature_age_thresholds + .size(), + 1); + ASSERT_EQ( + new_options->compaction_options_fifo.file_temperature_age_thresholds[0] + .temperature, + Temperature::kCold); + ASSERT_EQ( + new_options->compaction_options_fifo.file_temperature_age_thresholds[0] + .age, + 12345); + ColumnFamilyOptions rnd_filled_options = *new_options; options->~ColumnFamilyOptions(); @@ -578,6 +597,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { {offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier_additional), sizeof(std::vector)}, + {offsetof(struct MutableCFOptions, compaction_options_fifo), + sizeof(struct CompactionOptionsFIFO)}, {offsetof(struct MutableCFOptions, compression_per_level), sizeof(std::vector)}, {offsetof(struct MutableCFOptions, max_file_size), diff --git a/options/options_test.cc b/options/options_test.cc index c22df98c62fa..ef0b5084317f 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -101,7 +101,9 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"compaction_style", "kCompactionStyleLevel"}, {"compaction_pri", "kOldestSmallestSeqFirst"}, {"verify_checksums_in_compaction", "false"}, - {"compaction_options_fifo", "23"}, + {"compaction_options_fifo", + "{allow_compaction=true;max_table_files_size=11002244;" + "file_temperature_age_thresholds={{temperature=kCold;age=12345}}}"}, {"max_sequential_skip_in_iterations", "24"}, {"inplace_update_support", "true"}, {"report_bg_io_stats", "true"}, @@ -244,7 +246,18 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.compaction_style, kCompactionStyleLevel); ASSERT_EQ(new_cf_opt.compaction_pri, kOldestSmallestSeqFirst); ASSERT_EQ(new_cf_opt.compaction_options_fifo.max_table_files_size, - static_cast(23)); + static_cast(11002244)); + ASSERT_EQ(new_cf_opt.compaction_options_fifo.allow_compaction, true); + ASSERT_EQ( + new_cf_opt.compaction_options_fifo.file_temperature_age_thresholds.size(), + 1); + ASSERT_EQ( + new_cf_opt.compaction_options_fifo.file_temperature_age_thresholds[0] + .temperature, + Temperature::kCold); + ASSERT_EQ( + new_cf_opt.compaction_options_fifo.file_temperature_age_thresholds[0].age, + 12345); ASSERT_EQ(new_cf_opt.max_sequential_skip_in_iterations, static_cast(24)); ASSERT_EQ(new_cf_opt.inplace_update_support, true); @@ -2295,7 +2308,9 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { {"compaction_style", "kCompactionStyleLevel"}, {"compaction_pri", "kOldestSmallestSeqFirst"}, {"verify_checksums_in_compaction", "false"}, - {"compaction_options_fifo", "23"}, + {"compaction_options_fifo", + "{allow_compaction=true;max_table_files_size=11002244;" + "file_temperature_age_thresholds={{temperature=kCold;age=12345}}}"}, {"max_sequential_skip_in_iterations", "24"}, {"inplace_update_support", "true"}, {"report_bg_io_stats", "true"}, @@ -2436,7 +2451,18 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.compaction_style, kCompactionStyleLevel); ASSERT_EQ(new_cf_opt.compaction_pri, kOldestSmallestSeqFirst); ASSERT_EQ(new_cf_opt.compaction_options_fifo.max_table_files_size, - static_cast(23)); + static_cast(11002244)); + ASSERT_EQ(new_cf_opt.compaction_options_fifo.allow_compaction, true); + ASSERT_EQ( + new_cf_opt.compaction_options_fifo.file_temperature_age_thresholds.size(), + 1); + ASSERT_EQ( + new_cf_opt.compaction_options_fifo.file_temperature_age_thresholds[0] + .temperature, + Temperature::kCold); + ASSERT_EQ( + new_cf_opt.compaction_options_fifo.file_temperature_age_thresholds[0].age, + 12345); ASSERT_EQ(new_cf_opt.max_sequential_skip_in_iterations, static_cast(24)); ASSERT_EQ(new_cf_opt.inplace_update_support, true);