From f866e7651f8345f90772a1c10b3ecd3a3ae28328 Mon Sep 17 00:00:00 2001 From: Zhangmei Li Date: Fri, 14 Jun 2019 02:17:34 +0800 Subject: [PATCH] add cache options for rocksdb Change-Id: Icc4ae746f3117905960b8de52ae59386e0684584 --- hugegraph-core/pom.xml | 2 +- .../backend/store/rocksdb/RocksDBOptions.java | 109 ++++++++++++++++-- .../store/rocksdb/RocksDBStdSessions.java | 56 ++++++++- 3 files changed, 151 insertions(+), 16 deletions(-) diff --git a/hugegraph-core/pom.xml b/hugegraph-core/pom.xml index 39aebc0c63..f090647f67 100644 --- a/hugegraph-core/pom.xml +++ b/hugegraph-core/pom.xml @@ -19,7 +19,7 @@ com.baidu.hugegraph hugegraph-common - 1.6.3 + 1.6.5 diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java index 20bafeab5b..6366c1e945 100644 --- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java +++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java @@ -21,9 +21,15 @@ import static com.baidu.hugegraph.config.OptionChecker.allowValues; import static com.baidu.hugegraph.config.OptionChecker.disallowEmpty; +import static com.baidu.hugegraph.config.OptionChecker.inValues; import static com.baidu.hugegraph.config.OptionChecker.rangeDouble; import static com.baidu.hugegraph.config.OptionChecker.rangeInt; +import org.rocksdb.CompactionStyle; +import org.rocksdb.CompressionType; + +import com.baidu.hugegraph.config.ConfigConvOption; +import com.baidu.hugegraph.config.ConfigListConvOption; import com.baidu.hugegraph.config.ConfigListOption; import com.baidu.hugegraph.config.ConfigOption; import com.baidu.hugegraph.config.OptionHolder; @@ -100,11 +106,12 @@ public static synchronized RocksDBOptions instance() { 7 ); - public static final ConfigOption COMPACTION_STYLE = - new ConfigOption<>( + public static final ConfigConvOption COMPACTION_STYLE = + new ConfigConvOption<>( "rocksdb.compaction_style", "Set compaction style for RocksDB: LEVEL/UNIVERSAL/FIFO.", allowValues("LEVEL", "UNIVERSAL", "FIFO"), + CompactionStyle::valueOf, "LEVEL" ); @@ -124,12 +131,25 @@ public static synchronized RocksDBOptions instance() { false ); - public static final ConfigOption COMPRESSION_TYPE = - new ConfigOption<>( - "rocksdb.compression_type", - "The compression algorithm of RocksDB: snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", - allowValues("snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), - "snappy" + public static final ConfigListConvOption COMPRESSION_TYPES = + new ConfigListConvOption<>( + "rocksdb.compression_types", + "The compression algorithms for different levels of RocksDB, " + + "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + inValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + CompressionType::getCompressionType, + String.class, + "", "", "snappy", "snappy", "snappy", "snappy", "snappy" + ); + + public static final ConfigConvOption BOTTOMMOST_COMPACTION_TYPE = + new ConfigConvOption<>( + "rocksdb.bottommost_level_compression_type", + "The compression algorithm for the bottommost level of RocksDB, " + + "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + allowValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + CompressionType::getCompressionType, + "" ); public static final ConfigOption MAX_BG_COMPACTIONS = @@ -205,6 +225,17 @@ public static synchronized RocksDBOptions instance() { 0 ); + + public static final ConfigConvOption MEMTABLE_COMPRESSION_TYPE = + new ConfigConvOption<>( + "rocksdb.memtable_compression_type", + "The compression algorithm for write buffers of RocksDB, " + + "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + allowValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + CompressionType::getCompressionType, + "snappy" + ); + public static final ConfigOption MAX_LEVEL1_BYTES = new ConfigOption<>( "rocksdb.max_bytes_for_level_base", @@ -265,7 +296,67 @@ public static synchronized RocksDBOptions instance() { public static final ConfigOption USE_DIRECT_READS_WRITES_FC = new ConfigOption<>( "rocksdb.use_direct_io_for_flush_and_compaction", - "Enable the OS to use direct reads and writes in flush and compaction.", + "Enable the OS to use direct read/writes in flush and compaction.", + disallowEmpty(), + false + ); + + public static final ConfigOption BLOCK_CACHE_CAPACITY = + new ConfigOption<>( + "rocksdb.block_cache_capacity", + "The amount of block cache in bytes that will be used by RocksDB, " + + "0 means no block cache.", + rangeInt(0L, Long.MAX_VALUE), + 8L * Bytes.MB + ); + + public static final ConfigOption PIN_L0_FILTER_AND_INDEX_IN_CACHE = + new ConfigOption<>( + "rocksdb.pin_l0_filter_and_index_blocks_in_cache", + "Indicating if we'd put index/filter blocks to the block cache.", + disallowEmpty(), + false + ); + + public static final ConfigOption PUT_FILTER_AND_INDEX_IN_CACHE = + new ConfigOption<>( + "rocksdb.cache_index_and_filter_blocks", + "Indicating if we'd put index/filter blocks to the block cache.", + disallowEmpty(), + false + ); + + public static final ConfigOption BLOOM_FILTER_BITS_PER_KEY = + new ConfigOption<>( + "rocksdb.bloom_filter_bits_per_key", + "The bits per key in bloom filter, a good value is 10, " + + "which yields a filter with ~ 1% false positive rate, " + + "-1 means no bloom filter.", + rangeInt(-1, Integer.MAX_VALUE), + -1 + ); + + public static final ConfigOption BLOOM_FILTER_MODE = + new ConfigOption<>( + "rocksdb.bloom_filter_block_based_mode", + "Use block based filter rather than full filter.", + disallowEmpty(), + false + ); + + public static final ConfigOption BLOOM_FILTER_WHOLE_KEY = + new ConfigOption<>( + "rocksdb.bloom_filter_whole_key_filtering", + "True if place whole keys in the bloom filter, " + + "else place the prefix of keys.", + disallowEmpty(), + true + ); + + public static final ConfigOption BLOOM_FILTERS_SKIP_LAST_LEVEL = + new ConfigOption<>( + "rocksdb.optimize_filters_for_hits", + "This flag allows us to not store filters for the last level.", disallowEmpty(), false ); diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java index ed0c4f6875..0b1638caf6 100644 --- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java +++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java @@ -30,11 +30,12 @@ import java.util.NoSuchElementException; import java.util.Set; +import org.rocksdb.BlockBasedTableConfig; +import org.rocksdb.BloomFilter; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.ColumnFamilyOptions; import org.rocksdb.ColumnFamilyOptionsInterface; -import org.rocksdb.CompactionStyle; import org.rocksdb.CompressionType; import org.rocksdb.DBOptions; import org.rocksdb.DBOptionsInterface; @@ -318,22 +319,65 @@ public static void initOptions(HugeConfig conf, cf.optimizeUniversalStyleCompaction(); } - cf.setNumLevels(conf.get(RocksDBOptions.NUM_LEVELS)); - cf.setCompactionStyle(CompactionStyle.valueOf( - conf.get(RocksDBOptions.COMPACTION_STYLE))); + int numLevels = conf.get(RocksDBOptions.NUM_LEVELS); + List compressions = + conf.get(RocksDBOptions.COMPRESSION_TYPES); + E.checkArgument(compressions.isEmpty() || + compressions.size() == numLevels, + "Elements number of '%s' must be the same as '%s'" + + ", bug got %s != %s", + RocksDBOptions.COMPRESSION_TYPES.name(), + RocksDBOptions.NUM_LEVELS.name(), + compressions.size(), numLevels); + + cf.setNumLevels(numLevels); + cf.setCompactionStyle(conf.get(RocksDBOptions.COMPACTION_STYLE)); + + cf.setBottommostCompressionType( + conf.get(RocksDBOptions.BOTTOMMOST_COMPACTION_TYPE)); + if (!compressions.isEmpty()) { + cf.setCompressionPerLevel(compressions); + } cf.setMinWriteBufferNumberToMerge( conf.get(RocksDBOptions.MIN_MEMTABLES_TO_MERGE)); cf.setMaxWriteBufferNumberToMaintain( conf.get(RocksDBOptions.MAX_MEMTABLES_TO_MAINTAIN)); + // https://github.com/facebook/rocksdb/wiki/Block-Cache + BlockBasedTableConfig tableConfig = new BlockBasedTableConfig(); + long cacheCapacity = conf.get(RocksDBOptions.BLOCK_CACHE_CAPACITY); + if (cacheCapacity <= 0L) { + // Bypassing bug https://github.com/facebook/rocksdb/pull/5465 + tableConfig.setNoBlockCache(true); + } else { + tableConfig.setBlockCacheSize(cacheCapacity); + } + tableConfig.setPinL0FilterAndIndexBlocksInCache( + conf.get(RocksDBOptions.PIN_L0_FILTER_AND_INDEX_IN_CACHE)); + tableConfig.setCacheIndexAndFilterBlocks( + conf.get(RocksDBOptions.PUT_FILTER_AND_INDEX_IN_CACHE)); + + // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter + int bitsPerKey = conf.get(RocksDBOptions.BLOOM_FILTER_BITS_PER_KEY); + if (bitsPerKey >= 0) { + boolean blockBased = conf.get(RocksDBOptions.BLOOM_FILTER_MODE); + tableConfig.setFilter(new BloomFilter(bitsPerKey, blockBased)); + } + tableConfig.setWholeKeyFiltering( + conf.get(RocksDBOptions.BLOOM_FILTER_WHOLE_KEY)); + cf.setTableFormatConfig(tableConfig); + + cf.setOptimizeFiltersForHits( + conf.get(RocksDBOptions.BLOOM_FILTERS_SKIP_LAST_LEVEL)); + // https://github.com/facebook/rocksdb/tree/master/utilities/merge_operators cf.setMergeOperatorName("uint64add"); // uint64add/stringappend } if (mcf != null) { - mcf.setCompressionType(CompressionType.getCompressionType( - conf.get(RocksDBOptions.COMPRESSION_TYPE))); + mcf.setCompressionType( + conf.get(RocksDBOptions.MEMTABLE_COMPRESSION_TYPE)); mcf.setWriteBufferSize( conf.get(RocksDBOptions.MEMTABLE_SIZE));