From f866e7651f8345f90772a1c10b3ecd3a3ae28328 Mon Sep 17 00:00:00 2001 From: Zhangmei Li Date: Fri, 14 Jun 2019 02:17:34 +0800 Subject: [PATCH 1/2] add cache options for rocksdb Change-Id: Icc4ae746f3117905960b8de52ae59386e0684584 --- hugegraph-core/pom.xml | 2 +- .../backend/store/rocksdb/RocksDBOptions.java | 109 ++++++++++++++++-- .../store/rocksdb/RocksDBStdSessions.java | 56 ++++++++- 3 files changed, 151 insertions(+), 16 deletions(-) diff --git a/hugegraph-core/pom.xml b/hugegraph-core/pom.xml index 39aebc0c63..f090647f67 100644 --- a/hugegraph-core/pom.xml +++ b/hugegraph-core/pom.xml @@ -19,7 +19,7 @@ com.baidu.hugegraph hugegraph-common - 1.6.3 + 1.6.5 diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java index 20bafeab5b..6366c1e945 100644 --- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java +++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java @@ -21,9 +21,15 @@ import static com.baidu.hugegraph.config.OptionChecker.allowValues; import static com.baidu.hugegraph.config.OptionChecker.disallowEmpty; +import static com.baidu.hugegraph.config.OptionChecker.inValues; import static com.baidu.hugegraph.config.OptionChecker.rangeDouble; import static com.baidu.hugegraph.config.OptionChecker.rangeInt; +import org.rocksdb.CompactionStyle; +import org.rocksdb.CompressionType; + +import com.baidu.hugegraph.config.ConfigConvOption; +import com.baidu.hugegraph.config.ConfigListConvOption; import com.baidu.hugegraph.config.ConfigListOption; import com.baidu.hugegraph.config.ConfigOption; import com.baidu.hugegraph.config.OptionHolder; @@ -100,11 +106,12 @@ public static synchronized RocksDBOptions instance() { 7 ); - public static final ConfigOption COMPACTION_STYLE = - new ConfigOption<>( + public static final ConfigConvOption COMPACTION_STYLE = + new ConfigConvOption<>( "rocksdb.compaction_style", "Set compaction style for RocksDB: LEVEL/UNIVERSAL/FIFO.", allowValues("LEVEL", "UNIVERSAL", "FIFO"), + CompactionStyle::valueOf, "LEVEL" ); @@ -124,12 +131,25 @@ public static synchronized RocksDBOptions instance() { false ); - public static final ConfigOption COMPRESSION_TYPE = - new ConfigOption<>( - "rocksdb.compression_type", - "The compression algorithm of RocksDB: snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", - allowValues("snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), - "snappy" + public static final ConfigListConvOption COMPRESSION_TYPES = + new ConfigListConvOption<>( + "rocksdb.compression_types", + "The compression algorithms for different levels of RocksDB, " + + "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + inValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + CompressionType::getCompressionType, + String.class, + "", "", "snappy", "snappy", "snappy", "snappy", "snappy" + ); + + public static final ConfigConvOption BOTTOMMOST_COMPACTION_TYPE = + new ConfigConvOption<>( + "rocksdb.bottommost_level_compression_type", + "The compression algorithm for the bottommost level of RocksDB, " + + "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + allowValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + CompressionType::getCompressionType, + "" ); public static final ConfigOption MAX_BG_COMPACTIONS = @@ -205,6 +225,17 @@ public static synchronized RocksDBOptions instance() { 0 ); + + public static final ConfigConvOption MEMTABLE_COMPRESSION_TYPE = + new ConfigConvOption<>( + "rocksdb.memtable_compression_type", + "The compression algorithm for write buffers of RocksDB, " + + "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + allowValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + CompressionType::getCompressionType, + "snappy" + ); + public static final ConfigOption MAX_LEVEL1_BYTES = new ConfigOption<>( "rocksdb.max_bytes_for_level_base", @@ -265,7 +296,67 @@ public static synchronized RocksDBOptions instance() { public static final ConfigOption USE_DIRECT_READS_WRITES_FC = new ConfigOption<>( "rocksdb.use_direct_io_for_flush_and_compaction", - "Enable the OS to use direct reads and writes in flush and compaction.", + "Enable the OS to use direct read/writes in flush and compaction.", + disallowEmpty(), + false + ); + + public static final ConfigOption BLOCK_CACHE_CAPACITY = + new ConfigOption<>( + "rocksdb.block_cache_capacity", + "The amount of block cache in bytes that will be used by RocksDB, " + + "0 means no block cache.", + rangeInt(0L, Long.MAX_VALUE), + 8L * Bytes.MB + ); + + public static final ConfigOption PIN_L0_FILTER_AND_INDEX_IN_CACHE = + new ConfigOption<>( + "rocksdb.pin_l0_filter_and_index_blocks_in_cache", + "Indicating if we'd put index/filter blocks to the block cache.", + disallowEmpty(), + false + ); + + public static final ConfigOption PUT_FILTER_AND_INDEX_IN_CACHE = + new ConfigOption<>( + "rocksdb.cache_index_and_filter_blocks", + "Indicating if we'd put index/filter blocks to the block cache.", + disallowEmpty(), + false + ); + + public static final ConfigOption BLOOM_FILTER_BITS_PER_KEY = + new ConfigOption<>( + "rocksdb.bloom_filter_bits_per_key", + "The bits per key in bloom filter, a good value is 10, " + + "which yields a filter with ~ 1% false positive rate, " + + "-1 means no bloom filter.", + rangeInt(-1, Integer.MAX_VALUE), + -1 + ); + + public static final ConfigOption BLOOM_FILTER_MODE = + new ConfigOption<>( + "rocksdb.bloom_filter_block_based_mode", + "Use block based filter rather than full filter.", + disallowEmpty(), + false + ); + + public static final ConfigOption BLOOM_FILTER_WHOLE_KEY = + new ConfigOption<>( + "rocksdb.bloom_filter_whole_key_filtering", + "True if place whole keys in the bloom filter, " + + "else place the prefix of keys.", + disallowEmpty(), + true + ); + + public static final ConfigOption BLOOM_FILTERS_SKIP_LAST_LEVEL = + new ConfigOption<>( + "rocksdb.optimize_filters_for_hits", + "This flag allows us to not store filters for the last level.", disallowEmpty(), false ); diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java index ed0c4f6875..0b1638caf6 100644 --- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java +++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java @@ -30,11 +30,12 @@ import java.util.NoSuchElementException; import java.util.Set; +import org.rocksdb.BlockBasedTableConfig; +import org.rocksdb.BloomFilter; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.ColumnFamilyOptions; import org.rocksdb.ColumnFamilyOptionsInterface; -import org.rocksdb.CompactionStyle; import org.rocksdb.CompressionType; import org.rocksdb.DBOptions; import org.rocksdb.DBOptionsInterface; @@ -318,22 +319,65 @@ public static void initOptions(HugeConfig conf, cf.optimizeUniversalStyleCompaction(); } - cf.setNumLevels(conf.get(RocksDBOptions.NUM_LEVELS)); - cf.setCompactionStyle(CompactionStyle.valueOf( - conf.get(RocksDBOptions.COMPACTION_STYLE))); + int numLevels = conf.get(RocksDBOptions.NUM_LEVELS); + List compressions = + conf.get(RocksDBOptions.COMPRESSION_TYPES); + E.checkArgument(compressions.isEmpty() || + compressions.size() == numLevels, + "Elements number of '%s' must be the same as '%s'" + + ", bug got %s != %s", + RocksDBOptions.COMPRESSION_TYPES.name(), + RocksDBOptions.NUM_LEVELS.name(), + compressions.size(), numLevels); + + cf.setNumLevels(numLevels); + cf.setCompactionStyle(conf.get(RocksDBOptions.COMPACTION_STYLE)); + + cf.setBottommostCompressionType( + conf.get(RocksDBOptions.BOTTOMMOST_COMPACTION_TYPE)); + if (!compressions.isEmpty()) { + cf.setCompressionPerLevel(compressions); + } cf.setMinWriteBufferNumberToMerge( conf.get(RocksDBOptions.MIN_MEMTABLES_TO_MERGE)); cf.setMaxWriteBufferNumberToMaintain( conf.get(RocksDBOptions.MAX_MEMTABLES_TO_MAINTAIN)); + // https://github.com/facebook/rocksdb/wiki/Block-Cache + BlockBasedTableConfig tableConfig = new BlockBasedTableConfig(); + long cacheCapacity = conf.get(RocksDBOptions.BLOCK_CACHE_CAPACITY); + if (cacheCapacity <= 0L) { + // Bypassing bug https://github.com/facebook/rocksdb/pull/5465 + tableConfig.setNoBlockCache(true); + } else { + tableConfig.setBlockCacheSize(cacheCapacity); + } + tableConfig.setPinL0FilterAndIndexBlocksInCache( + conf.get(RocksDBOptions.PIN_L0_FILTER_AND_INDEX_IN_CACHE)); + tableConfig.setCacheIndexAndFilterBlocks( + conf.get(RocksDBOptions.PUT_FILTER_AND_INDEX_IN_CACHE)); + + // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter + int bitsPerKey = conf.get(RocksDBOptions.BLOOM_FILTER_BITS_PER_KEY); + if (bitsPerKey >= 0) { + boolean blockBased = conf.get(RocksDBOptions.BLOOM_FILTER_MODE); + tableConfig.setFilter(new BloomFilter(bitsPerKey, blockBased)); + } + tableConfig.setWholeKeyFiltering( + conf.get(RocksDBOptions.BLOOM_FILTER_WHOLE_KEY)); + cf.setTableFormatConfig(tableConfig); + + cf.setOptimizeFiltersForHits( + conf.get(RocksDBOptions.BLOOM_FILTERS_SKIP_LAST_LEVEL)); + // https://github.com/facebook/rocksdb/tree/master/utilities/merge_operators cf.setMergeOperatorName("uint64add"); // uint64add/stringappend } if (mcf != null) { - mcf.setCompressionType(CompressionType.getCompressionType( - conf.get(RocksDBOptions.COMPRESSION_TYPE))); + mcf.setCompressionType( + conf.get(RocksDBOptions.MEMTABLE_COMPRESSION_TYPE)); mcf.setWriteBufferSize( conf.get(RocksDBOptions.MEMTABLE_SIZE)); From a1b08c08f24b03c82f709631e071d015282f744d Mon Sep 17 00:00:00 2001 From: Zhangmei Li Date: Tue, 18 Jun 2019 21:10:07 +0800 Subject: [PATCH 2/2] tiny inprove Change-Id: I4eb88b658165409516ecb9ee94ba8bec2ffc21fd --- .../com/baidu/hugegraph/cmd/ConfDumper.java | 6 +-- .../backend/store/rocksdb/RocksDBOptions.java | 41 +++++++++---------- .../store/rocksdb/RocksDBStdSessions.java | 21 ++++------ 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/hugegraph-dist/src/main/java/com/baidu/hugegraph/cmd/ConfDumper.java b/hugegraph-dist/src/main/java/com/baidu/hugegraph/cmd/ConfDumper.java index fd98075333..6e2a114a6f 100644 --- a/hugegraph-dist/src/main/java/com/baidu/hugegraph/cmd/ConfDumper.java +++ b/hugegraph-dist/src/main/java/com/baidu/hugegraph/cmd/ConfDumper.java @@ -26,9 +26,9 @@ import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.io.FileUtils; -import com.baidu.hugegraph.config.ConfigOption; import com.baidu.hugegraph.config.HugeConfig; import com.baidu.hugegraph.config.OptionSpace; +import com.baidu.hugegraph.config.TypedOption; import com.baidu.hugegraph.dist.RegisterUtil; import com.baidu.hugegraph.util.E; @@ -52,12 +52,12 @@ public static void main(String[] args) HugeConfig config = new HugeConfig(input); for (String name : new TreeSet<>(OptionSpace.keys())) { - ConfigOption option = OptionSpace.get(name); + TypedOption option = OptionSpace.get(name); writeOption(output, option, config.get(option)); } } - private static void writeOption(File output, ConfigOption option, + private static void writeOption(File output, TypedOption option, Object value) throws IOException { StringBuilder sb = new StringBuilder(); sb.append("# ").append(option.desc()).append(EOL); diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java index 6366c1e945..fd1cc29bb3 100644 --- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java +++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java @@ -131,25 +131,35 @@ public static synchronized RocksDBOptions instance() { false ); - public static final ConfigListConvOption COMPRESSION_TYPES = + public static final ConfigListConvOption LEVELS_COMPRESSIONS = new ConfigListConvOption<>( - "rocksdb.compression_types", + "rocksdb.compression_per_level", "The compression algorithms for different levels of RocksDB, " + - "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", - inValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + "allowed values are none/snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + inValues("none", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), CompressionType::getCompressionType, String.class, - "", "", "snappy", "snappy", "snappy", "snappy", "snappy" + "none", "none", "snappy", "snappy", "snappy", "snappy", "snappy" ); - public static final ConfigConvOption BOTTOMMOST_COMPACTION_TYPE = + public static final ConfigConvOption BOTTOMMOST_COMPRESSION = new ConfigConvOption<>( - "rocksdb.bottommost_level_compression_type", + "rocksdb.bottommost_compression", "The compression algorithm for the bottommost level of RocksDB, " + - "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", - allowValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + "allowed values are none/snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + allowValues("none", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), CompressionType::getCompressionType, - "" + "none" + ); + + public static final ConfigConvOption COMPRESSION = + new ConfigConvOption<>( + "rocksdb.compression", + "The compression algorithm for compressing blocks of RocksDB, " + + "allowed values are none/snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", + allowValues("none", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), + CompressionType::getCompressionType, + "snappy" ); public static final ConfigOption MAX_BG_COMPACTIONS = @@ -225,17 +235,6 @@ public static synchronized RocksDBOptions instance() { 0 ); - - public static final ConfigConvOption MEMTABLE_COMPRESSION_TYPE = - new ConfigConvOption<>( - "rocksdb.memtable_compression_type", - "The compression algorithm for write buffers of RocksDB, " + - "allowed compressions are snappy/z/bzip2/lz4/lz4hc/xpress/zstd.", - allowValues("", "snappy", "z", "bzip2", "lz4", "lz4hc", "xpress", "zstd"), - CompressionType::getCompressionType, - "snappy" - ); - public static final ConfigOption MAX_LEVEL1_BYTES = new ConfigOption<>( "rocksdb.max_bytes_for_level_base", diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java index 0b1638caf6..9784584c48 100644 --- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java +++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java @@ -320,13 +320,13 @@ public static void initOptions(HugeConfig conf, } int numLevels = conf.get(RocksDBOptions.NUM_LEVELS); - List compressions = - conf.get(RocksDBOptions.COMPRESSION_TYPES); + List compressions = conf.get( + RocksDBOptions.LEVELS_COMPRESSIONS); E.checkArgument(compressions.isEmpty() || compressions.size() == numLevels, - "Elements number of '%s' must be the same as '%s'" + - ", bug got %s != %s", - RocksDBOptions.COMPRESSION_TYPES.name(), + "Elements number of '%s' must be 0 or " + + "be the same as '%s', bug got %s != %s", + RocksDBOptions.LEVELS_COMPRESSIONS.name(), RocksDBOptions.NUM_LEVELS.name(), compressions.size(), numLevels); @@ -334,7 +334,7 @@ public static void initOptions(HugeConfig conf, cf.setCompactionStyle(conf.get(RocksDBOptions.COMPACTION_STYLE)); cf.setBottommostCompressionType( - conf.get(RocksDBOptions.BOTTOMMOST_COMPACTION_TYPE)); + conf.get(RocksDBOptions.BOTTOMMOST_COMPRESSION)); if (!compressions.isEmpty()) { cf.setCompressionPerLevel(compressions); } @@ -376,13 +376,10 @@ public static void initOptions(HugeConfig conf, } if (mcf != null) { - mcf.setCompressionType( - conf.get(RocksDBOptions.MEMTABLE_COMPRESSION_TYPE)); + mcf.setCompressionType(conf.get(RocksDBOptions.COMPRESSION)); - mcf.setWriteBufferSize( - conf.get(RocksDBOptions.MEMTABLE_SIZE)); - mcf.setMaxWriteBufferNumber( - conf.get(RocksDBOptions.MAX_MEMTABLES)); + mcf.setWriteBufferSize(conf.get(RocksDBOptions.MEMTABLE_SIZE)); + mcf.setMaxWriteBufferNumber(conf.get(RocksDBOptions.MAX_MEMTABLES)); mcf.setMaxBytesForLevelBase( conf.get(RocksDBOptions.MAX_LEVEL1_BYTES));