diff --git a/hugegraph-rocksdb/pom.xml b/hugegraph-rocksdb/pom.xml
index 207dd7c34d..dbcfc66c07 100644
--- a/hugegraph-rocksdb/pom.xml
+++ b/hugegraph-rocksdb/pom.xml
@@ -20,7 +20,7 @@
org.rocksdb
rocksdbjni
- 6.10.2
+ 7.2.2
diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java
index f0a4df24e8..0bd4667ff2 100644
--- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java
+++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBOptions.java
@@ -27,6 +27,8 @@
import org.rocksdb.CompactionStyle;
import org.rocksdb.CompressionType;
+import org.rocksdb.DataBlockIndexType;
+import org.rocksdb.IndexType;
import com.baidu.hugegraph.config.ConfigConvOption;
import com.baidu.hugegraph.config.ConfigListConvOption;
@@ -217,6 +219,15 @@ public static synchronized RocksDBOptions instance() {
false
);
+ public static final ConfigOption SKIP_CHECK_SIZE_ON_DB_OPEN =
+ new ConfigOption<>(
+ "rocksdb.skip_check_sst_size_on_db_open",
+ "Whether to skip checking sizes of all sst files when " +
+ "opening the database.",
+ disallowEmpty(),
+ false
+ );
+
public static final ConfigOption MAX_FILE_OPENING_THREADS =
new ConfigOption<>(
"rocksdb.max_file_opening_threads",
@@ -235,6 +246,39 @@ public static synchronized RocksDBOptions instance() {
0L
);
+ public static final ConfigOption BYTES_PER_SYNC =
+ new ConfigOption<>(
+ "rocksdb.bytes_per_sync",
+ "Allows OS to incrementally sync SST files to disk while " +
+ "they are being written, asynchronously in the background. " +
+ "Issue one request for every bytes_per_sync written. " +
+ "0 turns it off.",
+ rangeInt(0L, Long.MAX_VALUE),
+ 0L
+ );
+
+ public static final ConfigOption WAL_BYTES_PER_SYNC =
+ new ConfigOption<>(
+ "rocksdb.wal_bytes_per_sync",
+ "Allows OS to incrementally sync WAL files to disk while " +
+ "they are being written, asynchronously in the background. " +
+ "Issue one request for every bytes_per_sync written. " +
+ "0 turns it off.",
+ rangeInt(0L, Long.MAX_VALUE),
+ 0L
+ );
+
+ public static final ConfigOption STRICT_BYTES_PER_SYNC =
+ new ConfigOption<>(
+ "rocksdb.strict_bytes_per_sync",
+ "When true, guarantees SST/WAL files have at most " +
+ "bytes_per_sync/wal_bytes_per_sync bytes submitted for " +
+ "writeback at any given time. This can be used to handle " +
+ "cases where processing speed exceeds I/O speed.",
+ disallowEmpty(),
+ false
+ );
+
public static final ConfigOption DB_MEMTABLE_SIZE =
new ConfigOption<>(
"rocksdb.db_write_buffer_size",
@@ -244,6 +288,35 @@ public static synchronized RocksDBOptions instance() {
0L
);
+ public static final ConfigOption LOG_READAHEAD_SIZE =
+ new ConfigOption<>(
+ "rocksdb.log_readahead_size",
+ "The number of bytes to prefetch when reading the log. " +
+ "0 means the prefetching is disabled.",
+ rangeInt(0L, Long.MAX_VALUE),
+ 0L
+ );
+
+ public static final ConfigOption COMPACTION_READAHEAD_SIZE =
+ new ConfigOption<>(
+ "rocksdb.compaction_readahead_size",
+ "The number of bytes to perform bigger reads when doing " +
+ "compaction. If running RocksDB on spinning disks, " +
+ "you should set this to at least 2MB. " +
+ "0 means the prefetching is disabled.",
+ rangeInt(0L, Long.MAX_VALUE),
+ 0L
+ );
+
+ public static final ConfigOption ROW_CACHE_CAPACITY =
+ new ConfigOption<>(
+ "rocksdb.row_cache_capacity",
+ "The capacity in bytes of global cache for table-level rows. " +
+ "0 means the row_cache is disabled.",
+ rangeInt(0L, Long.MAX_VALUE),
+ 0L
+ );
+
public static final ConfigOption DELETE_OBSOLETE_FILE_PERIOD =
new ConfigOption<>(
"rocksdb.delete_obsolete_files_period",
@@ -280,11 +353,52 @@ public static synchronized RocksDBOptions instance() {
public static final ConfigOption MAX_MEMTABLES_TO_MAINTAIN =
new ConfigOption<>(
"rocksdb.max_write_buffer_number_to_maintain",
- "The total maximum number of write buffers to maintain in memory.",
+ "The total maximum number of write buffers to maintain in memory " +
+ "for conflict checking when transactions are used.",
rangeInt(0, Integer.MAX_VALUE),
0
);
+ public static final ConfigOption MEMTABLE_BLOOM_SIZE_RATIO =
+ new ConfigOption<>(
+ "rocksdb.memtable_bloom_size_ratio",
+ "If prefix-extractor is set and memtable_bloom_size_ratio " +
+ "is not 0, or if memtable_whole_key_filtering is set true, " +
+ "create bloom filter for memtable with the size of " +
+ "write_buffer_size * memtable_bloom_size_ratio. " +
+ "If it is larger than 0.25, it is santinized to 0.25.",
+ rangeDouble(0.0, 1.0),
+ 0.0
+ );
+
+ public static final ConfigOption MEMTABLE_BLOOM_WHOLE_KEY_FILTERING =
+ new ConfigOption<>(
+ "rocksdb.memtable_whole_key_filtering",
+ "Enable whole key bloom filter in memtable, it can " +
+ "potentially reduce CPU usage for point-look-ups. Note " +
+ "this will only take effect if memtable_bloom_size_ratio > 0.",
+ disallowEmpty(),
+ false
+ );
+
+ public static final ConfigOption MEMTABL_BLOOM_HUGE_PAGE_SIZE =
+ new ConfigOption<>(
+ "rocksdb.memtable_huge_page_size",
+ "The page size for huge page TLB for bloom in memtable. " +
+ "If <= 0, not allocate from huge page TLB but from malloc.",
+ rangeInt(0L, Long.MAX_VALUE),
+ 0L
+ );
+
+ public static final ConfigOption MEMTABLE_INPLACE_UPDATE_SUPPORT =
+ new ConfigOption<>(
+ "rocksdb.inplace_update_support",
+ "Allows thread-safe inplace updates if a put key exists " +
+ "in current memtable and sizeof new value is smaller.",
+ disallowEmpty(),
+ false
+ );
+
public static final ConfigOption DYNAMIC_LEVEL_BYTES =
new ConfigOption<>(
"rocksdb.level_compaction_dynamic_level_bytes",
@@ -404,6 +518,89 @@ public static synchronized RocksDBOptions instance() {
false
);
+ public static final ConfigOption USE_FSYNC =
+ new ConfigOption<>(
+ "rocksdb.use_fsync",
+ "If true, then every store to stable storage will issue a fsync.",
+ disallowEmpty(),
+ false
+ );
+
+ public static final ConfigOption ATOMIC_FLUSH =
+ new ConfigOption<>(
+ "rocksdb.atomic_flush",
+ "If true, flushing multiple column families and committing " +
+ "their results atomically to MANIFEST. Note that it's not " +
+ "necessary to set atomic_flush=true if WAL is always enabled.",
+ disallowEmpty(),
+ false
+ );
+
+ public static final ConfigOption TABLE_FORMAT_VERSION =
+ new ConfigOption<>(
+ "rocksdb.format_version",
+ "The format version of BlockBasedTable, allowed values are 0~5.",
+ rangeInt(0, 5),
+ 5
+ );
+
+ public static final ConfigConvOption INDEX_TYPE =
+ new ConfigConvOption<>(
+ "rocksdb.index_type",
+ "The index type used to lookup between data blocks " +
+ "with the sst table, allowed values are [kBinarySearch," +
+ "kHashSearch,kTwoLevelIndexSearch,kBinarySearchWithFirstKey].",
+ allowValues("kBinarySearch", "kHashSearch",
+ "kTwoLevelIndexSearch", "kBinarySearchWithFirstKey"),
+ IndexType::valueOf,
+ "kBinarySearch"
+ );
+
+ public static final ConfigConvOption DATA_BLOCK_SEARCH_TYPE =
+ new ConfigConvOption<>(
+ "rocksdb.data_block_index_type",
+ "The search type used to point lookup in data block with " +
+ "the sst table, allowed values are [kDataBlockBinarySearch," +
+ "kDataBlockBinaryAndHash].",
+ allowValues("kDataBlockBinarySearch", "kDataBlockBinaryAndHash"),
+ DataBlockIndexType::valueOf,
+ "kDataBlockBinarySearch"
+ );
+
+ public static final ConfigOption DATA_BLOCK_HASH_TABLE_RATIO =
+ new ConfigOption<>(
+ "rocksdb.data_block_hash_table_util_ratio",
+ "The hash table utilization ratio value of entries/buckets. " +
+ "It is valid only when data_block_index_type=kDataBlockBinaryAndHash.",
+ rangeDouble(0.0, 1.0),
+ 0.75
+ );
+
+ public static final ConfigOption BLOCK_SIZE =
+ new ConfigOption<>(
+ "rocksdb.block_size",
+ "Approximate size of user data packed per block, Note that " +
+ "it corresponds to uncompressed data.",
+ rangeInt(0L, Long.MAX_VALUE),
+ 4L * Bytes.KB
+ );
+
+ public static final ConfigOption BLOCK_SIZE_DEVIATION =
+ new ConfigOption<>(
+ "rocksdb.block_size_deviation",
+ "The percentage of free space used to close a block.",
+ rangeInt(0, 100),
+ 10
+ );
+
+ public static final ConfigOption BLOCK_RESTART_INTERVAL =
+ new ConfigOption<>(
+ "rocksdb.block_restart_interval",
+ "The block restart interval for delta encoding in blocks.",
+ rangeInt(0, Integer.MAX_VALUE),
+ 16
+ );
+
public static final ConfigOption BLOCK_CACHE_CAPACITY =
new ConfigOption<>(
"rocksdb.block_cache_capacity",
@@ -413,28 +610,31 @@ public static synchronized RocksDBOptions instance() {
8L * Bytes.MB
);
- public static final ConfigOption PIN_L0_FILTER_AND_INDEX_IN_CACHE =
+ public static final ConfigOption CACHE_FILTER_AND_INDEX =
new ConfigOption<>(
- "rocksdb.pin_l0_filter_and_index_blocks_in_cache",
- "Indicating if we'd put index/filter blocks to the block cache.",
+ "rocksdb.cache_index_and_filter_blocks",
+ "Set this option true if we'd put index/filter blocks to " +
+ "the block cache.",
disallowEmpty(),
- false
+ true
);
- public static final ConfigOption PUT_FILTER_AND_INDEX_IN_CACHE =
+ public static final ConfigOption PIN_L0_INDEX_AND_FILTER =
new ConfigOption<>(
- "rocksdb.cache_index_and_filter_blocks",
- "Indicating if we'd put index/filter blocks to the block cache.",
+ "rocksdb.pin_l0_filter_and_index_blocks_in_cache",
+ "Set this option true if we'd pin L0 index/filter blocks to " +
+ "the block cache.",
disallowEmpty(),
- false
+ true
);
public static final ConfigOption BLOOM_FILTER_BITS_PER_KEY =
new ConfigOption<>(
"rocksdb.bloom_filter_bits_per_key",
"The bits per key in bloom filter, a good value is 10, " +
- "which yields a filter with ~ 1% false positive rate, " +
- "-1 means no bloom filter.",
+ "which yields a filter with ~ 1% false positive rate. " +
+ "Set bloom_filter_bits_per_key > 0 to enable bloom filter, " +
+ "-1 means no bloom filter (0~0.5 round down to no filter).",
rangeInt(-1, Integer.MAX_VALUE),
-1
);
@@ -442,7 +642,8 @@ public static synchronized RocksDBOptions instance() {
public static final ConfigOption BLOOM_FILTER_MODE =
new ConfigOption<>(
"rocksdb.bloom_filter_block_based_mode",
- "Use block based filter rather than full filter.",
+ "If bloom filter is enabled, set this option true to " +
+ "use block based filter rather than full filter.",
disallowEmpty(),
false
);
@@ -450,8 +651,9 @@ public static synchronized RocksDBOptions instance() {
public static final ConfigOption BLOOM_FILTER_WHOLE_KEY =
new ConfigOption<>(
"rocksdb.bloom_filter_whole_key_filtering",
- "True if place whole keys in the bloom filter, " +
- "else place the prefix of keys.",
+ "If bloom filter is enabled, set this option true to " +
+ "place whole keys in the bloom filter, else place the " +
+ "prefix of keys when prefix-extractor is set.",
disallowEmpty(),
true
);
@@ -459,8 +661,41 @@ public static synchronized RocksDBOptions instance() {
public static final ConfigOption BLOOM_FILTERS_SKIP_LAST_LEVEL =
new ConfigOption<>(
"rocksdb.optimize_filters_for_hits",
- "This flag allows us to not store filters for the last level.",
+ "If bloom filter is enabled, this flag allows us to not " +
+ "store filters for the last level. set this option true to " +
+ "optimize the filters mainly for cases where keys are found " +
+ "rather than also optimize for keys missed.",
+ disallowEmpty(),
+ true
+ );
+
+ public static final ConfigOption PARTITION_FILTERS_INDEXES =
+ new ConfigOption<>(
+ "rocksdb.partition_filters_and_indexes",
+ "If bloom filter is enabled, set this option true to use " +
+ "partitioned full filters and indexes for each sst file. " +
+ "This option is incompatible with block-based filters.",
disallowEmpty(),
false
);
+
+ public static final ConfigOption PIN_TOP_INDEX_AND_FILTER =
+ new ConfigOption<>(
+ "rocksdb.pin_top_level_index_and_filter",
+ "If partition_filters_and_indexes is set true, set this " +
+ "option true if we'd pin top-level index of partitioned " +
+ "filter and index blocks to the block cache.",
+ disallowEmpty(),
+ true
+ );
+
+ public static final ConfigOption PREFIX_EXTRACTOR_CAPPED =
+ new ConfigOption<>(
+ "rocksdb.prefix_extractor_n_bytes",
+ "The prefix-extractor uses the first N bytes of a key as its prefix, " +
+ "it will use the full key when a key is shorter than the N. " +
+ "0 means unset prefix-extractor.",
+ rangeInt(0, Integer.MAX_VALUE),
+ 0
+ );
}
diff --git a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java
index 75766e5daf..06652152cd 100644
--- a/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java
+++ b/hugegraph-rocksdb/src/main/java/com/baidu/hugegraph/backend/store/rocksdb/RocksDBStdSessions.java
@@ -44,6 +44,7 @@
import org.rocksdb.DBOptions;
import org.rocksdb.DBOptionsInterface;
import org.rocksdb.Env;
+import org.rocksdb.IndexType;
import org.rocksdb.InfoLogLevel;
import org.rocksdb.LRUCache;
import org.rocksdb.MutableColumnFamilyOptionsInterface;
@@ -53,6 +54,7 @@
import org.rocksdb.RocksDBException;
import org.rocksdb.RocksIterator;
import org.rocksdb.SstFileManager;
+import org.rocksdb.TableFormatConfig;
import org.rocksdb.WriteBatch;
import org.rocksdb.WriteOptions;
import org.slf4j.Logger;
@@ -475,6 +477,8 @@ public static void initOptions(HugeConfig conf,
* should we use options.setCreateMissingColumnFamilies()?
*/
db.setCreateIfMissing(true);
+ db.setWriteDbidToManifest(true);
+ db.setAvoidUnnecessaryBlockingIO(true);
// Optimize RocksDB
if (optimize) {
@@ -500,16 +504,30 @@ public static void initOptions(HugeConfig conf,
db.setUseDirectIoForFlushAndCompaction(
conf.get(RocksDBOptions.USE_DIRECT_READS_WRITES_FC));
+ db.setUseFsync(conf.get(RocksDBOptions.USE_FSYNC));
+
+ db.setAtomicFlush(conf.get(RocksDBOptions.ATOMIC_FLUSH));
+
db.setMaxManifestFileSize(
conf.get(RocksDBOptions.MAX_MANIFEST_FILE_SIZE));
db.setSkipStatsUpdateOnDbOpen(
conf.get(RocksDBOptions.SKIP_STATS_UPDATE_ON_DB_OPEN));
+ db.setSkipCheckingSstFileSizesOnDbOpen(
+ conf.get(RocksDBOptions.SKIP_CHECK_SIZE_ON_DB_OPEN));
db.setMaxFileOpeningThreads(
conf.get(RocksDBOptions.MAX_FILE_OPENING_THREADS));
db.setDbWriteBufferSize(conf.get(RocksDBOptions.DB_MEMTABLE_SIZE));
+
+ db.setLogReadaheadSize(conf.get(RocksDBOptions.LOG_READAHEAD_SIZE));
+
+ // A global cache for table-level rows
+ long cacheCapacity = conf.get(RocksDBOptions.ROW_CACHE_CAPACITY);
+ if (cacheCapacity > 0) {
+ db.setRowCache(new LRUCache(cacheCapacity));
+ }
}
if (mdb != null) {
@@ -527,6 +545,14 @@ public static void initOptions(HugeConfig conf,
mdb.setMaxTotalWalSize(conf.get(RocksDBOptions.MAX_TOTAL_WAL_SIZE));
+ mdb.setBytesPerSync(conf.get(RocksDBOptions.BYTES_PER_SYNC));
+ mdb.setWalBytesPerSync(conf.get(RocksDBOptions.WAL_BYTES_PER_SYNC));
+ mdb.setStrictBytesPerSync(
+ conf.get(RocksDBOptions.STRICT_BYTES_PER_SYNC));
+
+ mdb.setCompactionReadaheadSize(
+ conf.get(RocksDBOptions.COMPACTION_READAHEAD_SIZE));
+
mdb.setDeleteObsoleteFilesPeriodMicros(1000000 *
conf.get(RocksDBOptions.DELETE_OBSOLETE_FILE_PERIOD));
}
@@ -563,37 +589,23 @@ public static void initOptions(HugeConfig conf,
cf.setMaxWriteBufferNumberToMaintain(
conf.get(RocksDBOptions.MAX_MEMTABLES_TO_MAINTAIN));
+ cf.setInplaceUpdateSupport(
+ conf.get(RocksDBOptions.MEMTABLE_INPLACE_UPDATE_SUPPORT));
+
cf.setLevelCompactionDynamicLevelBytes(
conf.get(RocksDBOptions.DYNAMIC_LEVEL_BYTES));
- // https://github.com/facebook/rocksdb/wiki/Block-Cache
- BlockBasedTableConfig tableConfig = new BlockBasedTableConfig();
- long cacheCapacity = conf.get(RocksDBOptions.BLOCK_CACHE_CAPACITY);
- if (cacheCapacity <= 0L) {
- // Bypassing bug https://github.com/facebook/rocksdb/pull/5465
- tableConfig.setNoBlockCache(true);
- } else {
- tableConfig.setBlockCache(new LRUCache(cacheCapacity));
- }
- tableConfig.setPinL0FilterAndIndexBlocksInCache(
- conf.get(RocksDBOptions.PIN_L0_FILTER_AND_INDEX_IN_CACHE));
- tableConfig.setCacheIndexAndFilterBlocks(
- conf.get(RocksDBOptions.PUT_FILTER_AND_INDEX_IN_CACHE));
-
- // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
- int bitsPerKey = conf.get(RocksDBOptions.BLOOM_FILTER_BITS_PER_KEY);
- if (bitsPerKey >= 0) {
- boolean blockBased = conf.get(RocksDBOptions.BLOOM_FILTER_MODE);
- tableConfig.setFilterPolicy(new BloomFilter(bitsPerKey,
- blockBased));
- }
- tableConfig.setWholeKeyFiltering(
- conf.get(RocksDBOptions.BLOOM_FILTER_WHOLE_KEY));
- cf.setTableFormatConfig(tableConfig);
-
cf.setOptimizeFiltersForHits(
conf.get(RocksDBOptions.BLOOM_FILTERS_SKIP_LAST_LEVEL));
+ cf.setTableFormatConfig(initTableConfig(conf));
+
+ // CappedPrefixExtractor uses the first N bytes
+ int prefixLength = conf.get(RocksDBOptions.PREFIX_EXTRACTOR_CAPPED);
+ if (prefixLength > 0) {
+ cf.useCappedPrefixExtractor(prefixLength);
+ }
+
// https://github.com/facebook/rocksdb/tree/master/utilities/merge_operators
cf.setMergeOperatorName("uint64add"); // uint64add/stringappend
}
@@ -626,6 +638,20 @@ public static void initOptions(HugeConfig conf,
mcf.setHardPendingCompactionBytesLimit(
conf.get(RocksDBOptions.HARD_PENDING_COMPACTION_LIMIT));
+ /*
+ * TODO: also set memtable options:
+ * memtable_insert_with_hint_prefix_extractor
+ * The reason why use option name `memtable_bloom_size_ratio`:
+ * https://github.com/facebook/rocksdb/pull/9453/files
+ * #diff-cde52d1fcbcce2bc6aae27838f1d3e7e9e469ccad8aaf8f2695f939e279d7501R369
+ */
+ mcf.setMemtablePrefixBloomSizeRatio(
+ conf.get(RocksDBOptions.MEMTABLE_BLOOM_SIZE_RATIO));
+ mcf.setMemtableWholeKeyFiltering(
+ conf.get(RocksDBOptions.MEMTABLE_BLOOM_WHOLE_KEY_FILTERING));
+ mcf.setMemtableHugePageSize(
+ conf.get(RocksDBOptions.MEMTABL_BLOOM_HUGE_PAGE_SIZE));
+
boolean bulkload = conf.get(RocksDBOptions.BULKLOAD_MODE);
if (bulkload) {
// Disable automatic compaction
@@ -645,6 +671,79 @@ public static void initOptions(HugeConfig conf,
}
}
+ public static TableFormatConfig initTableConfig(HugeConfig conf) {
+ BlockBasedTableConfig tableConfig = new BlockBasedTableConfig();
+
+ tableConfig.setFormatVersion(
+ conf.get(RocksDBOptions.TABLE_FORMAT_VERSION));
+
+ /*
+ * The index type used to lookup between data blocks:
+ * https://github.com/facebook/rocksdb/wiki/Index-Block-Format
+ *
+ * TODO: support more index options:
+ * tableConfig.setIndexShortening(IndexShorteningMode.kShortenSeparators);
+ * tableConfig.setEnableIndexCompression(true);
+ * tableConfig.setIndexBlockRestartInterval(1);
+ */
+ tableConfig.setIndexType(conf.get(RocksDBOptions.INDEX_TYPE));
+
+ /*
+ * The search type of point lookup can be BinarySearch or HashSearch:
+ * https://github.com/facebook/rocksdb/wiki/Data-Block-Hash-Index
+ */
+ tableConfig.setDataBlockIndexType(
+ conf.get(RocksDBOptions.DATA_BLOCK_SEARCH_TYPE));
+ tableConfig.setDataBlockHashTableUtilRatio(
+ conf.get(RocksDBOptions.DATA_BLOCK_HASH_TABLE_RATIO));
+
+ long blockSize = conf.get(RocksDBOptions.BLOCK_SIZE);
+ tableConfig.setBlockSize(blockSize);
+ tableConfig.setBlockSizeDeviation(
+ conf.get(RocksDBOptions.BLOCK_SIZE_DEVIATION));
+ tableConfig.setBlockRestartInterval(
+ conf.get(RocksDBOptions.BLOCK_RESTART_INTERVAL));
+
+ // https://github.com/facebook/rocksdb/wiki/Block-Cache
+ long cacheCapacity = conf.get(RocksDBOptions.BLOCK_CACHE_CAPACITY);
+ if (cacheCapacity <= 0L) {
+ // Bypassing bug https://github.com/facebook/rocksdb/pull/5465
+ tableConfig.setNoBlockCache(true);
+ } else {
+ tableConfig.setBlockCache(new LRUCache(cacheCapacity));
+ }
+
+ // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
+ int bitsPerKey = conf.get(RocksDBOptions.BLOOM_FILTER_BITS_PER_KEY);
+ if (bitsPerKey >= 0) {
+ // TODO: use space-saving RibbonFilterPolicy
+ boolean blockBased = conf.get(RocksDBOptions.BLOOM_FILTER_MODE);
+ tableConfig.setFilterPolicy(new BloomFilter(bitsPerKey,
+ blockBased));
+
+ tableConfig.setWholeKeyFiltering(
+ conf.get(RocksDBOptions.BLOOM_FILTER_WHOLE_KEY));
+
+ tableConfig.setCacheIndexAndFilterBlocks(
+ conf.get(RocksDBOptions.CACHE_FILTER_AND_INDEX));
+ tableConfig.setPinL0FilterAndIndexBlocksInCache(
+ conf.get(RocksDBOptions.PIN_L0_INDEX_AND_FILTER));
+
+ // https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters
+ if (conf.get(RocksDBOptions.PARTITION_FILTERS_INDEXES)) {
+ // Enable partitioned indexes and partitioned filters
+ tableConfig.setPartitionFilters(true)
+ .setIndexType(IndexType.kTwoLevelIndexSearch)
+ .setMetadataBlockSize(blockSize)
+ .setCacheIndexAndFilterBlocksWithHighPriority(true);
+ tableConfig.setPinTopLevelIndexAndFilter(
+ conf.get(RocksDBOptions.PIN_TOP_INDEX_AND_FILTER));
+ }
+ }
+
+ return tableConfig;
+ }
+
public static final byte[] encode(String string) {
return StringEncoding.encode(string);
}
@@ -662,13 +761,13 @@ private final class StdSession extends RocksDBSessions.Session {
private WriteOptions writeOptions;
public StdSession(HugeConfig conf) {
- boolean raftMode = conf.get(CoreOptions.RAFT_MODE);
this.batch = new WriteBatch();
this.writeOptions = new WriteOptions();
/*
* When work under raft mode. if store crashed, the state-machine
* can restore by snapshot + raft log, doesn't need wal and sync
*/
+ boolean raftMode = conf.get(CoreOptions.RAFT_MODE);
if (raftMode) {
this.writeOptions.setDisableWAL(true);
this.writeOptions.setSync(false);
@@ -938,9 +1037,13 @@ public BackendColumnIterator scan(String table, byte[] prefix) {
assert !this.hasChanges();
/*
* NOTE: Options.prefix_extractor is a prerequisite for
- * Options.setPrefixSameAsStart(true):
- * ReadOptions options = new ReadOptions();
- * options.setPrefixSameAsStart(true);
+ * optimized prefix seek, if Options.prefix_extractor if enabled,
+ * can setPrefixSameAsStart(true) or setAutoPrefixMode(true):
+ * ReadOptions options = new ReadOptions();
+ * options.setPrefixSameAsStart(true);
+ * or
+ * options.setAutoPrefixMode(true);
+ * options.setIterateUpperBound(prefix + 1);
*/
try (CFHandle cf = cf(table)) {
ReusedRocksIterator iter = cf.newIterator();
@@ -957,9 +1060,14 @@ public BackendColumnIterator scan(String table, byte[] keyFrom,
byte[] keyTo, int scanType) {
assert !this.hasChanges();
/*
- * Not sure if setTotalOrderSeek(true) must be set:
- * ReadOptions options = new ReadOptions();
- * options.setTotalOrderSeek(true);
+ * NOTE: if Options.prefix_extractor if enabled, need to
+ * setTotalOrderSeek(true) or setAutoPrefixMode(true) to make
+ * page-seek or shard-scan return right results:
+ * ReadOptions options = new ReadOptions();
+ * options.setTotalOrderSeek(true);
+ * or
+ * options.setAutoPrefixMode(true);
+ * options.setIterateUpperBound(keyTo);
*/
try (CFHandle cf = cf(table)) {
ReusedRocksIterator iter = cf.newIterator();
@@ -1059,7 +1167,7 @@ private boolean match(int expected) {
@SuppressWarnings("unused")
private void dump() {
this.seek();
- LOG.info(">>>> scan from {}: {}{}",
+ LOG.info(">>>> scan from {}: {}{}",
this.table,
this.keyBegin == null ? "*" : StringEncoding.format(this.keyBegin),
this.iter.isValid() ? "" : " - No data");
@@ -1097,7 +1205,7 @@ public boolean hasNext() {
}
private void seek() {
- if (this.keyBegin == null) {
+ if (this.keyBegin == null || this.keyBegin.length <= 0) {
// Seek to the first if no `keyBegin`
this.iter.seekToFirst();
} else {
diff --git a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/rocksdb/RocksDBSessionTest.java b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/rocksdb/RocksDBSessionTest.java
index 2e870d858e..3abc3c936e 100644
--- a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/rocksdb/RocksDBSessionTest.java
+++ b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/rocksdb/RocksDBSessionTest.java
@@ -519,9 +519,19 @@ public void testDeleteByRangeWithSignedBytes() throws RocksDBException {
byte[] value21 = getBytes("value-2-1");
session.put(TABLE, key21, value21);
- session.deleteRange(TABLE, new byte[]{1, -3}, new byte[]{1, 3});
this.commit();
+ // TODO: enable after fixed rocksdb issue #8239
+ /*
+ session.deleteRange(TABLE, new byte[]{1, -3}, new byte[]{1, 3});
+ Assert.assertThrows(BackendException.class, () -> {
+ this.commit();
+ }, e -> {
+ Assert.assertContains("end key comes before start key",
+ e.getCause().getMessage());
+ });
+ */
+
Assert.assertArrayEquals(value11, session.get(TABLE, key11));
Assert.assertArrayEquals(value12, session.get(TABLE, key12));
Assert.assertArrayEquals(value21, session.get(TABLE, key21));