forked from cockroachdb/cockroach
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rocksdb: Bump version to 4.9 and fix broken interface usage
They changed a couple parts of their interfaces that broke our code. For now, I've taken the hideous approach of copying the old logic to avoid modifying the options that we end up using, but will look for a better solution tomorrow (and have asked their maintainers for a recommendation as well on https://reviews.facebook.net/D59319). Rushing this a bit because we think that 4.9 might fix a deadlock in rocksdb that's regularly affecting us on GCE, and we'd like to verify that assumption tomorrow morning and get it into this week's beta release if it helps things.
- Loading branch information
1 parent
514206e
commit a5ce7dd
Showing
4 changed files
with
247 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. An additional grant | ||
// of patent rights can be found in the PATENTS file in the same directory. | ||
// | ||
// TODO(a-robinson,pmattis): Clean up or remove this logic. For the sake of | ||
// stability in the very short term, it's just been copied over verbatim | ||
// from the rocksdb code that was removed from their repo in | ||
// https://github.com/facebook/rocksdb/commit/b2973eaaebbaa2b410fd808dc2b1578ca8ab0c07 | ||
// I've asked for recommendations in the original code review at | ||
// https://reviews.facebook.net/D59319 | ||
|
||
#include <math.h> | ||
#include <cmath> | ||
#include <algorithm> | ||
#include "rocksdb/options.h" | ||
|
||
namespace rocksdb { | ||
|
||
namespace { | ||
|
||
// For now, always use 1-0 as level bytes multiplier. | ||
const int kBytesForLevelMultiplier = 10; | ||
const size_t kBytesForOneMb = 1024 * 1024; | ||
|
||
// Pick compaction style | ||
CompactionStyle PickCompactionStyle(size_t write_buffer_size, | ||
int read_amp_threshold, | ||
int write_amp_threshold, | ||
uint64_t target_db_size) { | ||
#ifndef ROCKSDB_LITE | ||
// Estimate read amplification and write amplification of two compaction | ||
// styles. If there is hard limit to force a choice, make the choice. | ||
// Otherwise, calculate a score based on threshold and expected value of | ||
// two styles, weighing reads 4X important than writes. | ||
int expected_levels = static_cast<int>(ceil( | ||
std::log(target_db_size / write_buffer_size) / std::log(kBytesForLevelMultiplier))); | ||
|
||
int expected_max_files_universal = | ||
static_cast<int>(ceil(log2(target_db_size / write_buffer_size))); | ||
|
||
const int kEstimatedLevel0FilesInLevelStyle = 2; | ||
// Estimate write amplification: | ||
// (1) 1 for every L0 file | ||
// (2) 2 for L1 | ||
// (3) kBytesForLevelMultiplier for the last level. It's really hard to | ||
// predict. | ||
// (3) kBytesForLevelMultiplier for other levels. | ||
int expected_write_amp_level = kEstimatedLevel0FilesInLevelStyle + 2 | ||
+ (expected_levels - 2) * kBytesForLevelMultiplier | ||
+ kBytesForLevelMultiplier; | ||
int expected_read_amp_level = | ||
kEstimatedLevel0FilesInLevelStyle + expected_levels; | ||
|
||
int max_read_amp_uni = expected_max_files_universal; | ||
if (read_amp_threshold <= max_read_amp_uni) { | ||
return kCompactionStyleLevel; | ||
} else if (write_amp_threshold <= expected_write_amp_level) { | ||
return kCompactionStyleUniversal; | ||
} | ||
|
||
const double kReadWriteWeight = 4; | ||
|
||
double level_ratio = | ||
static_cast<double>(read_amp_threshold) / expected_read_amp_level * | ||
kReadWriteWeight + | ||
static_cast<double>(write_amp_threshold) / expected_write_amp_level; | ||
|
||
int expected_write_amp_uni = expected_max_files_universal / 2 + 2; | ||
int expected_read_amp_uni = expected_max_files_universal / 2 + 1; | ||
|
||
double uni_ratio = | ||
static_cast<double>(read_amp_threshold) / expected_read_amp_uni * | ||
kReadWriteWeight + | ||
static_cast<double>(write_amp_threshold) / expected_write_amp_uni; | ||
|
||
if (level_ratio > uni_ratio) { | ||
return kCompactionStyleLevel; | ||
} else { | ||
return kCompactionStyleUniversal; | ||
} | ||
#else | ||
return kCompactionStyleLevel; | ||
#endif // !ROCKSDB_LITE | ||
} | ||
|
||
// Pick mem table size | ||
void PickWriteBufferSize(size_t total_write_buffer_limit, Options* options) { | ||
const size_t kMaxWriteBufferSize = 128 * kBytesForOneMb; | ||
const size_t kMinWriteBufferSize = 4 * kBytesForOneMb; | ||
|
||
// Try to pick up a buffer size between 4MB and 128MB. | ||
// And try to pick 4 as the total number of write buffers. | ||
size_t write_buffer_size = total_write_buffer_limit / 4; | ||
if (write_buffer_size > kMaxWriteBufferSize) { | ||
write_buffer_size = kMaxWriteBufferSize; | ||
} else if (write_buffer_size < kMinWriteBufferSize) { | ||
write_buffer_size = std::min(static_cast<size_t>(kMinWriteBufferSize), | ||
total_write_buffer_limit / 2); | ||
} | ||
|
||
// Truncate to multiple of 1MB. | ||
if (write_buffer_size % kBytesForOneMb != 0) { | ||
write_buffer_size = | ||
(write_buffer_size / kBytesForOneMb + 1) * kBytesForOneMb; | ||
} | ||
|
||
options->write_buffer_size = write_buffer_size; | ||
options->max_write_buffer_number = | ||
static_cast<int>(total_write_buffer_limit / write_buffer_size); | ||
options->min_write_buffer_number_to_merge = 1; | ||
} | ||
|
||
#ifndef ROCKSDB_LITE | ||
void OptimizeForUniversal(Options* options) { | ||
options->level0_file_num_compaction_trigger = 2; | ||
options->level0_slowdown_writes_trigger = 30; | ||
options->level0_stop_writes_trigger = 40; | ||
options->max_open_files = -1; | ||
} | ||
#endif | ||
|
||
// Optimize parameters for level-based compaction | ||
void OptimizeForLevel(int read_amplification_threshold, | ||
int write_amplification_threshold, | ||
uint64_t target_db_size, Options* options) { | ||
int expected_levels_one_level0_file = | ||
static_cast<int>(ceil(std::log(target_db_size / options->write_buffer_size) / | ||
std::log(kBytesForLevelMultiplier))); | ||
|
||
int level0_stop_writes_trigger = | ||
read_amplification_threshold - expected_levels_one_level0_file; | ||
|
||
const size_t kInitialLevel0TotalSize = 128 * kBytesForOneMb; | ||
const int kMaxFileNumCompactionTrigger = 4; | ||
const int kMinLevel0StopTrigger = 3; | ||
|
||
int file_num_buffer = static_cast<int>( | ||
kInitialLevel0TotalSize / options->write_buffer_size + 1); | ||
|
||
if (level0_stop_writes_trigger > file_num_buffer) { | ||
// Have sufficient room for multiple level 0 files | ||
// Try enlarge the buffer up to 1GB | ||
|
||
// Try to enlarge the buffer up to 1GB, if still have sufficient headroom. | ||
file_num_buffer *= | ||
1 << std::max(0, std::min(3, level0_stop_writes_trigger - | ||
file_num_buffer - 2)); | ||
|
||
options->level0_stop_writes_trigger = level0_stop_writes_trigger; | ||
options->level0_slowdown_writes_trigger = level0_stop_writes_trigger - 2; | ||
options->level0_file_num_compaction_trigger = | ||
std::min(kMaxFileNumCompactionTrigger, file_num_buffer / 2); | ||
} else { | ||
options->level0_stop_writes_trigger = | ||
std::max(kMinLevel0StopTrigger, file_num_buffer); | ||
options->level0_slowdown_writes_trigger = | ||
options->level0_stop_writes_trigger - 1; | ||
options->level0_file_num_compaction_trigger = 1; | ||
} | ||
|
||
// This doesn't consider compaction and overheads of mem tables. But usually | ||
// it is in the same order of magnitude. | ||
size_t expected_level0_compaction_size = | ||
options->level0_file_num_compaction_trigger * options->write_buffer_size; | ||
// Enlarge level1 target file size if level0 compaction size is larger. | ||
uint64_t max_bytes_for_level_base = 10 * kBytesForOneMb; | ||
if (expected_level0_compaction_size > max_bytes_for_level_base) { | ||
max_bytes_for_level_base = expected_level0_compaction_size; | ||
} | ||
options->max_bytes_for_level_base = max_bytes_for_level_base; | ||
// Now always set level multiplier to be 10 | ||
options->max_bytes_for_level_multiplier = kBytesForLevelMultiplier; | ||
|
||
const uint64_t kMinFileSize = 2 * kBytesForOneMb; | ||
// Allow at least 3-way parallelism for compaction between level 1 and 2. | ||
uint64_t max_file_size = max_bytes_for_level_base / 3; | ||
if (max_file_size < kMinFileSize) { | ||
options->target_file_size_base = kMinFileSize; | ||
} else { | ||
if (max_file_size % kBytesForOneMb != 0) { | ||
max_file_size = (max_file_size / kBytesForOneMb + 1) * kBytesForOneMb; | ||
} | ||
options->target_file_size_base = max_file_size; | ||
} | ||
|
||
// TODO: consider to tune num_levels too. | ||
} | ||
|
||
} // namespace | ||
|
||
Options GetOptions(size_t total_write_buffer_limit, | ||
int read_amplification_threshold, | ||
int write_amplification_threshold, uint64_t target_db_size) { | ||
Options options; | ||
PickWriteBufferSize(total_write_buffer_limit, &options); | ||
size_t write_buffer_size = options.write_buffer_size; | ||
options.compaction_style = | ||
PickCompactionStyle(write_buffer_size, read_amplification_threshold, | ||
write_amplification_threshold, target_db_size); | ||
#ifndef ROCKSDB_LITE | ||
if (options.compaction_style == kCompactionStyleUniversal) { | ||
OptimizeForUniversal(&options); | ||
} else { | ||
#else | ||
{ | ||
#endif // !ROCKSDB_LITE | ||
OptimizeForLevel(read_amplification_threshold, | ||
write_amplification_threshold, target_db_size, &options); | ||
} | ||
return options; | ||
} | ||
|
||
} // namespace rocksdb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. An additional grant | ||
// of patent rights can be found in the PATENTS file in the same directory. | ||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. See the AUTHORS file for names of contributors. | ||
|
||
#ifndef STORAGE_ROCKSDB_INCLUDE_OPTIONS_BUILDER_H_ | ||
#define STORAGE_ROCKSDB_INCLUDE_OPTIONS_BUILDER_H_ | ||
|
||
#include "rocksdb/options.h" | ||
|
||
namespace rocksdb { | ||
|
||
// Get options based on some guidelines. Now only tune parameter based on | ||
// flush/compaction and fill default parameters for other parameters. | ||
// total_write_buffer_limit: budget for memory spent for mem tables | ||
// read_amplification_threshold: comfortable value of read amplification | ||
// write_amplification_threshold: comfortable value of write amplification. | ||
// target_db_size: estimated total DB size. | ||
extern Options GetOptions(size_t total_write_buffer_limit, | ||
int read_amplification_threshold = 8, | ||
int write_amplification_threshold = 32, | ||
uint64_t target_db_size = 68719476736 /* 64GB */); | ||
|
||
|
||
} // namespace rocksdb | ||
|
||
#endif // STORAGE_ROCKSDB_INCLUDE_OPTIONS_BUILDER_H_ |