etc/config-template.toml

## TiKV config template
##  Human-readable big numbers:
##   File size(based on byte, binary units): KB, MB, GB, TB, PB
##    e.g.: 1_048_576 = "1MB"
##   Time(based on ms): ms, s, m, h
##    e.g.: 78_000 = "1.3m"

## File to store slow logs.
## If "log-file" is set, but this is not set, the slow logs will be appeneded
## to "log-file". If both "log-file" and "slow-log-file" are not set, all logs
## will be appended to stderr.
# slow-log-file = ""

## The minimum operation cost to output relative logs.
# slow-log-threshold = "1s"

## Enable io snoop which utilize eBPF to get accurate disk io of TiKV
## It won't take effect when compiling without BCC_IOSNOOP=1.
# enable-io-snoop = true

## Use abort when TiKV panic. By default TiKV will use _exit() on panic, in that case
## core dump file will not be generated, regardless of system settings.
## If this config is enabled, core dump files needs to be cleanup to avoid disk space
## being filled up.
# abort-on-panic = false

## Memory usage limit for the TiKV instance. Generally it's unnecessary to configure it
## explicitly, in which case it will be set to 75% of total available system memory.
## Considering the behavior of `block-cache.capacity`, it means 25% memory is reserved for
## OS page cache.
##
## It's still unnecessary to configure it for deploying multiple TiKV nodes on a single
## physical machine. It will be calculated as `5/3 * block-cache.capacity`.
##
## For different system memory capacity, the default memory quota will be:
## * system=8G    block-cache=3.6G    memory-usage-limit=6G   page-cache=2G.
## * system=16G   block-cache=7.2G    memory-usage-limit=12G  page-cache=4G
## * system=32G   block-cache=14.4G   memory-usage-limit=24G  page-cache=8G
##
## So how can `memory-usage-limit` influence TiKV? When a TiKV's memory usage almost reaches
## this threshold, it can squeeze some internal components (e.g. evicting cached Raft entries)
## to release memory.
# memory-usage-limit = "0B"

[quota]
## Quota is use to add some limitation for the read write flow and then
## gain predictable stable performance.
## CPU quota for these front requests can use, default value is 0, it means unlimited.
## The unit is millicpu but for now this config is approximate and soft limit.
# foreground-cpu-time = 0
## Write bandwidth limitation for this TiKV instance, default value is 0 which means unlimited.
# foreground-write-bandwidth = "0B"
## Read bandwidth limitation for this TiKV instance, default value is 0 which means unlimited.
# foreground-read-bandwidth = "0B"
## Limitation of max delay duration for each request, default value is 0 which means unlimited.
# max-delay-duration = "500ms"

[log]
## Log levels: debug, info, warn, error, fatal.
## Note that `debug` is only available in development builds.
# level = "info"
## log format, one of json, text. Default to text.
# format = "text"
## Enable automatic timestamps in log output, if not set, it will be defaulted to true.
# enable-timestamp = true

[log.file]
## Usually it is set through command line.
# filename = ""
## max log file size in MB (upper limit to 4096MB)
# max-size = 300
## max log file keep days
# max-days = 0
## maximum number of old log files to retain
# max-backups = 0

## Configurations for the single thread pool serving read requests.
[readpool.unified]
## The minimal working thread count of the thread pool.
# min-thread-count = 1

## The maximum working thread count of the thread pool.
## The default value is max(4, LOGICAL_CPU_NUM * 0.8).
# max-thread-count = 4

## Size of the stack for each thread in the thread pool.
# stack-size = "10MB"

## Max running tasks of each worker, reject if exceeded.
# max-tasks-per-worker = 2000

[readpool.storage]
## Whether to use the unified read pool to handle storage requests.
# use-unified-pool = true

## The following configurations only take effect when `use-unified-pool` is false.

## Size of the thread pool for high-priority operations.
# high-concurrency = 4

## Size of the thread pool for normal-priority operations.
# normal-concurrency = 4

## Size of the thread pool for low-priority operations.
# low-concurrency = 4

## Max running high-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-high = 2000

## Max running normal-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-normal = 2000

## Max running low-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-low = 2000

## Size of the stack for each thread in the thread pool.
# stack-size = "10MB"

[readpool.coprocessor]
## Whether to use the unified read pool to handle coprocessor requests.
# use-unified-pool = true

## The following configurations only take effect when `use-unified-pool` is false.

## Most read requests from TiDB are sent to the coprocessor of TiKV. high/normal/low-concurrency is
## used to set the number of threads of the coprocessor.
## If there are many read requests, you can increase these config values (but keep it within the
## number of system CPU cores). For example, for a 32-core machine deployed with TiKV, you can even
## set these config to 30 in heavy read scenarios.
## If CPU_NUM > 8, the default thread pool size for coprocessors is set to CPU_NUM * 0.8.

# high-concurrency = 8
# normal-concurrency = 8
# low-concurrency = 8
# max-tasks-per-worker-high = 2000
# max-tasks-per-worker-normal = 2000
# max-tasks-per-worker-low = 2000

[server]
## Listening address.
# addr = "127.0.0.1:20160"

## Advertise listening address for client communication.
## If not set, `addr` will be used.
# advertise-addr = ""

## Status address.
## This is used for reporting the status of TiKV directly through 
## the HTTP address. Notice that there is a risk of leaking status
## information if this port is exposed to the public.
## Empty string means disabling it.
# status-addr = "127.0.0.1:20180"

## Set the maximum number of worker threads for the status report HTTP service.
# status-thread-pool-size = 1

## Compression type for gRPC channel: none, deflate or gzip.
# grpc-compression-type = "none"

## Size of the thread pool for the gRPC server.
# grpc-concurrency = 5

## The number of max concurrent streams/requests on a client connection.
# grpc-concurrent-stream = 1024

## Limit the memory size can be used by gRPC. Default is unlimited.
## gRPC usually works well to reclaim memory by itself. Limit the memory in case OOM
## is observed. Note that limit the usage can lead to potential stall.
# grpc-memory-pool-quota = "32G"

## The number of connections with each TiKV server to send Raft messages.
# grpc-raft-conn-num = 1

## Amount to read ahead on individual gRPC streams.
# grpc-stream-initial-window-size = "2MB"

## Time to wait before sending out a ping to check if server is still alive.
## This is only for communications between TiKV instances.
# grpc-keepalive-time = "10s"

## Time to wait before closing the connection without receiving KeepAlive ping Ack.
# grpc-keepalive-timeout = "3s"

## Set maximum message length in bytes that gRPC can send. `-1` means unlimited.
# max-grpc-send-msg-len = 10485760

## How many snapshots can be sent concurrently.
# concurrent-send-snap-limit = 32

## How many snapshots can be received concurrently.
# concurrent-recv-snap-limit = 32

## Max allowed recursion level when decoding Coprocessor DAG expression.
# end-point-recursion-limit = 1000

## Max time to handle Coprocessor requests before timeout.
# end-point-request-max-handle-duration = "60s"

## Max bytes that snapshot can be written to disk in one second.
## It should be set based on your disk performance.
# snap-max-write-bytes-per-sec = "100MB"

## Whether to enable request batch.
# enable-request-batch = true

## Attributes about this server, e.g. `{ zone = "us-west-1", disk = "ssd" }`.
# labels = {}

## The working thread count of the background pool, which include the endpoint of and br, split-check,
## region thread and other thread of delay-insensitive tasks.
## The default value is 2 if the number of CPU cores is less than 16, otherwise 3.
# background-thread-count = 2

## If handle time is larger than the threshold, it will print slow log in endpoint.
## The default value is 1s.
# end-point-slow-log-threshold = "1s"

[storage]
## The path to RocksDB directory.
# data-dir = "./"

## The number of slots in Scheduler latches, which controls write concurrency.
## In most cases you can use the default value. When importing data, you can set it to a larger
## value.
# scheduler-concurrency = 524288

## Scheduler's worker pool size, i.e. the number of write threads.
## It should be less than total CPU cores. When there are frequent write operations, set it to a
## higher value. More specifically, you can run `top -H -p tikv-pid` to check whether the threads
## named `sched-worker-pool` are busy.
# scheduler-worker-pool-size = 4

## When the pending write bytes exceeds this threshold, the "scheduler too busy" error is displayed.
# scheduler-pending-write-threshold = "100MB"

## For async commit transactions, it's possible to response to the client before applying prewrite
## requests. Enabling this can ease reduce latency when apply duration is significant, or reduce
## latency jittering when apply duration is not stable.
# enable-async-apply-prewrite = false

## Reserve some space to ensure recovering the store from `no space left` must succeed.
## `max(reserve-space, capacity * 5%)` will be reserved exactly.
##
## Set it to 0 will cause no space is reserved at all. It's generally used for tests.
# reserve-space = "5GB"

## The maximum recovery time after rocksdb detects restorable background errors. When the data belonging
## to the data range is damaged, it will be reported to PD through heartbeat, and PD will add `remove-peer`
## operator to remove this damaged peer. When the damaged peer still exists in the current store, the
## corruption SST files remain, and the KV storage engine can still put new content normally, but it
## will return error when reading corrupt data range.
##
## If after this time, the peer where the corrupted data range located has not been removed from the
## current store, TiKV will panic.
##
## Set to 0 to disable this feature if you want to panic immediately when encountering such an error.
# background-error-recovery-window = "1h"

[storage.block-cache]
## Whether to create a shared block cache for all RocksDB column families.
##
## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up read.
## It is recommended to turn on shared block cache. Since only the total cache size need to be
## set, it is easier to config. In most cases it should be able to auto-balance cache usage
## between column families with standard LRU algorithm.
##
## The rest of config in the storage.block-cache session is effective only when shared block cache
## is on.
# shared = true

## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory.
## When the config is not set, it is decided by the sum of the following fields or their default
## value:
##   * rocksdb.defaultcf.block-cache-size or 25% of system's total memory
##   * rocksdb.writecf.block-cache-size   or 15% of system's total memory
##   * rocksdb.lockcf.block-cache-size    or  2% of system's total memory
##   * raftdb.defaultcf.block-cache-size  or  2% of system's total memory
##
## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly.
## Otherwise, the OOM problem might occur in TiKV.
##
## If it's not set, 45% of available system memory will be used.
# capacity = "1GB"

[storage.flow-control]
## Flow controller is used to throttle the write rate at scheduler level, aiming
## to substitute the write stall mechanism of RocksDB. It features in two points:
##   * throttle at scheduler, so raftstore and apply won't be blocked anymore
##   * better control on the throttle rate to avoid QPS drop under heavy write
##
## Support change dynamically. 
## When enabled, it disables kvdb's write stall and raftdb's write stall(except memtable) and vice versa.
# enable = true

## When the number of immutable memtables of kvdb reaches the threshold, the flow controller begins to work
# memtables-threshold = 5

## When the number of SST files of level-0 of kvdb reaches the threshold, the flow controller begins to work
# l0-files-threshold = 20

## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to 
## reject some write requests with `ServerIsBusy` error. 
# soft-pending-compaction-bytes-limit = "192GB"

## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to 
## reject all write requests with `ServerIsBusy` error. 
# hard-pending-compaction-bytes-limit = "1024GB"

[storage.io-rate-limit]
## Maximum I/O bytes that this server can write to or read from disk (determined by mode)
## in one second. Internally it prefers throttling background operations over foreground
## ones. This value should be set to the disk's optimal IO bandwidth, e.g. maximum IO
## bandwidth specified by cloud disk vendors.
##
## When set to zero, disk IO operations are not limited.
# max-bytes-per-sec = "0MB"

## Determine which types of IO operations are counted and restrained below threshold.
## Three different modes are: write-only, read-only, all-io.
##
## Only write-only mode is supported for now.
# mode = "write-only"

[pd]
## PD endpoints.
# endpoints = ["127.0.0.1:2379"]

## The interval at which to retry a PD connection initialization.
## Default is 300ms.
# retry-interval = "300ms"

## If the client observes an error, it can can skip reporting it except every `n` times.
## Set to 1 to disable this feature.
## Default is 10.
# retry-log-every = 10

## The maximum number of times to retry a PD connection initialization.
## Set to 0 to disable retry.
## Default is -1, meaning isize::MAX times.
# retry-max-count = -1

[raftstore]
## Whether to enable Raft prevote.
## Prevote minimizes disruption when a partitioned node rejoins the cluster by using a two phase
## election.
# prevote = true

## The path to RaftDB directory.
## If not set, it will be `{data-dir}/raft`.
## If there are multiple disks on the machine, storing the data of Raft RocksDB on a different disk
## can improve TiKV performance.
# raftdb-path = ""

## Store capacity, i.e. max data size allowed.
## If it is not set, disk capacity is used.
# capacity = 0

## Internal notify capacity.
## 40960 is suitable for about 7000 Regions. It is recommended to use the default value.
# notify-capacity = 40960

## Maximum number of internal messages to process in a tick.
# messages-per-tick = 4096

## Region heartbeat tick interval for reporting to PD.
# pd-heartbeat-tick-interval = "60s"

## Store heartbeat tick interval for reporting to PD.
# pd-store-heartbeat-tick-interval = "10s"

## The threshold of triggering Region split check.
## When Region size change exceeds this config, TiKV will check whether the Region should be split
## or not. To reduce the cost of scanning data in the checking process, you can set the value to
## 32MB during checking and set it back to the default value in normal operations.
# region-split-check-diff = "6MB"

## The interval of triggering Region split check.
# split-region-check-tick-interval = "10s"

## When the number of Raft entries exceeds the max size, TiKV rejects to propose the entry.
# raft-entry-max-size = "8MB"

## Interval to compact unnecessary Raft log.
# raft-log-compact-sync-interval = "2s"

## Interval to GC unnecessary Raft log.
# raft-log-gc-tick-interval = "3s"

## Threshold to GC stale Raft log, must be >= 1.
# raft-log-gc-threshold = 50

## When the entry count exceeds this value, GC will be forced to trigger.
# raft-log-gc-count-limit = 73728

## When the approximate size of Raft log entries exceeds this value, GC will be forced trigger.
## It's recommanded to set it to 3/4 of `region-split-size`.
# raft-log-gc-size-limit = "72MB"

## Old Raft logs could be reserved if `raft_log_gc_threshold` is not reached.
## GC them after ticks `raft_log_reserve_max_ticks` times.
# raft_log_reserve_max_ticks = 6

## Raft engine is a replaceable component. For some implementations, it's necessary to purge
## old log files to recycle disk space ASAP.
# raft-engine-purge-interval = "10s"

## How long the peer will be considered down and reported to PD when it hasn't been active for this
## time.
# max-peer-down-duration = "10m"

## Interval to check whether to start manual compaction for a Region.
# region-compact-check-interval = "5m"

## Number of Regions for each time to check.
# region-compact-check-step = 100

## The minimum number of delete tombstones to trigger manual compaction.
# region-compact-min-tombstones = 10000

## The minimum percentage of delete tombstones to trigger manual compaction.
## It should be set between 1 and 100. Manual compaction is only triggered when the number of
## delete tombstones exceeds `region-compact-min-tombstones` and the percentage of delete tombstones
## exceeds `region-compact-tombstones-percent`.
# region-compact-tombstones-percent = 30

## Interval to check whether to start a manual compaction for Lock Column Family.
## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will
## trigger a manual compaction for Lock Column Family.
# lock-cf-compact-interval = "10m"
# lock-cf-compact-bytes-threshold = "256MB"

## Interval to check region whether the data is consistent.
# consistency-check-interval = "0s"

## Interval to clean up import SST files.
# cleanup-import-sst-interval = "10m"

## Use how many threads to handle log apply
# apply-pool-size = 2

## Use how many threads to handle raft messages
# store-pool-size = 2

## Use how many threads to handle raft io tasks
## If it is 0, it means io tasks are handled in store threads.
# store-io-pool-size = 0

## When the size of raft db writebatch exceeds this value, write will be triggered.
# raft-write-size-limit = "1MB"

## threads to generate raft snapshots
# snap-generator-pool-size = 2

[coprocessor]
## When it is set to `true`, TiKV will try to split a Region with table prefix if that Region
## crosses tables.
## It is recommended to turn off this option if there will be a large number of tables created.
# split-region-on-table = false

## One split check produces several split keys in batch. This config limits the number of produced
## split keys in one batch.
# batch-split-limit = 10

## When Region [a,e) size exceeds `region_max_size`, it will be split into several Regions [a,b),
## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `region_split_size` (or a
## little larger).
# region-max-size = "144MB"
# region-split-size = "96MB"

## When the number of keys in Region [a,e) exceeds the `region_max_keys`, it will be split into
## several Regions [a,b), [b,c), [c,d), [d,e) and the number of keys in [a,b), [b,c), [c,d) will be
## `region_split_keys`.
# region-max-keys = 1440000
# region-split-keys = 960000

## Set to "mvcc" to do consistency check for MVCC data, or "raw" for raw data.
# consistency-check-method = "mvcc"

[coprocessor-v2]
## Path to the directory where compiled coprocessor plugins are located.
## Plugins in this directory will be automatically loaded by TiKV.
## If the config value is not set, the coprocessor plugin will be disabled.
# coprocessor-plugin-directory = "./coprocessors"

[rocksdb]
## Maximum number of threads of RocksDB background jobs.
## The background tasks include compaction and flush. For detailed information why RocksDB needs to
## do compaction, see RocksDB-related materials.
## When write traffic (like the importing data size) is big, it is recommended to enable more
## threads. But set the number of the enabled threads smaller than that of CPU cores. For example,
## when importing data, for a machine with a 32-core CPU, set the value to 28.
## The default value is set to 8 or CPU_NUM - 1, whichever is smaller.
# max-background-jobs = 8

## Maximum number of threads of RocksDB background memtable flush jobs.
## The default value is set to 2 or max_background_jobs / 4, whichever is bigger.
# max-background-flushes = 2

## Represents the maximum number of threads that will concurrently perform a sub-compaction job by
## breaking it into multiple, smaller ones running simultaneously.
## The default value is set to 3 or the largest number to allow for two compactions, whichever is
## smaller.
# max-sub-compactions = 3

## Number of open files that can be used by the DB.
## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter
## blocks into block cache at startup. So if your database has a large working set, it will take
## several minutes to open the DB. You may need to increase this if your database has a large
## working set. You can estimate the number of files based on `target-file-size-base` and
## `target_file_size_multiplier` for level-based compaction.
# max-open-files = 40960

## Max size of RocksDB's MANIFEST file.
## For detailed explanation, please refer to https://github.com/facebook/rocksdb/wiki/MANIFEST
# max-manifest-file-size = "128MB"

## If the value is `true`, the database will be created if it is missing.
# create-if-missing = true

## RocksDB Write-Ahead Logs (WAL) recovery mode.
## "tolerate-corrupted-tail-records", tolerate incomplete record in trailing data on all logs;
## "absolute-consistency", We don't expect to find any corruption in the WAL;
## "point-in-time", Recover to point-in-time consistency;
## "skip-any-corrupted-records", Recovery after a disaster;
# wal-recovery-mode = "point-in-time"

## KV RocksDB WAL directory.
## This config specifies the absolute directory path for WAL.
## If it is not set, the log files will be in the same directory as data.
## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
## improve performance.
## Do not set this config the same as `raftdb.wal-dir`.
# wal-dir = ""

## The following two fields affect how archived WAL will be deleted.
## 1. If both values are set to 0, logs will be deleted ASAP and will not get into the archive.
## 2. If `wal-ttl-seconds` is 0 and `wal-size-limit` is not 0, WAL files will be checked every 10
##    min and if total size is greater than `wal-size-limit`, they will be deleted starting with the
##    earliest until `wal-size-limit` is met. All empty files will be deleted.
## 3. If `wal-ttl-seconds` is not 0 and `wal-size-limit` is 0, then WAL files will be checked every
##    `wal-ttl-seconds / 2` and those that are older than `wal-ttl-seconds` will be deleted.
## 4. If both are not 0, WAL files will be checked every 10 min and both checks will be performed
##    with ttl being first.
## When you set the path to RocksDB directory in memory like in `/dev/shm`, you may want to set
## `wal-ttl-seconds` to a value greater than 0 (like 86400) and backup your DB on a regular basis.
## See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
# wal-ttl-seconds = 0
# wal-size-limit = 0

## Max RocksDB WAL size in total
# max-total-wal-size = "4GB"

## RocksDB Statistics provides cumulative stats over time.
## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to
## know the internal status of RocksDB.
# enable-statistics = true

## Dump statistics periodically in information logs.
## Same as RocksDB's default value (10 min).
# stats-dump-period = "10m"

## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
## If you want to use RocksDB on multi disks or spinning disks, you should set value at least 2MB.
# compaction-readahead-size = 0

## Max buffer size that is used by WritableFileWrite.
# writable-file-max-buffer-size = "1MB"

## Use O_DIRECT for both reads and writes in background flush and compactions.
# use-direct-io-for-flush-and-compaction = false

## Limit the disk IO of compaction and flush.
## Compaction and flush can cause terrible spikes if they exceed a certain threshold. Consider
## setting this to 50% ~ 80% of the disk throughput for a more stable result. However, in heavy
## write workload, limiting compaction and flush speed can cause write stalls too.
## 1. rate-bytes-per-sec is the only parameter you want to set most of the time. It controls the
##    total write rate of compaction and flush in bytes per second. Currently, RocksDB does not
##    enforce rate limit for anything other than flush and compaction, e.g. write to WAL.
## 2. rate-limiter-refill-period controls how often IO tokens are refilled. Smaller value will flatten
##    IO bursts while introducing more CPU overhead.
## 3. rate-limiter-mode indicates which types of operations count against the limit.
##    "read-only"
##    "write-only"
##    "all-io"
## 4. rate-limiter-auto_tuned enables dynamic adjustment of rate limit within the range
##    [10MB/s, rate_bytes_per_sec], according to the recent demand for background I/O.
# rate-bytes-per-sec = "10GB"
# rate-limiter-refill-period = "100ms"
# rate-limiter-mode = "write-only"
# rate-limiter-auto-tuned = true

## Enable or disable the pipelined write. If set false, RocksDB will use a new write mode port from cockroachdb/pebble.
## See more details in https://github.com/tikv/rocksdb/pull/267 and https://github.com/tikv/tikv/issues/12059.
# enable-pipelined-write = false

## Allows OS to incrementally sync files to disk while they are being written, asynchronously,
## in the background.
# bytes-per-sync = "1MB"

## Allows OS to incrementally sync WAL to disk while it is being written.
# wal-bytes-per-sync = "512KB"

## Specify the maximal size of the RocksDB info log file.
## If the log file is larger than this config, a new info log file will be created.
## If it is set to 0, all logs will be written to one log file.
# info-log-max-size = "1GB"

## Time for the RocksDB info log file to roll (in seconds).
## If the log file has been active longer than this config, it will be rolled.
## If it is set to 0, rolling will be disabled.
# info-log-roll-time = "0s"

## Maximal RocksDB info log files to be kept.
# info-log-keep-log-file-num = 10

## Specifies the RocksDB info log directory.
## If it is empty, the log files will be in the same directory as data.
## If it is not empty, the log files will be in the specified directory, and the DB data directory's
## absolute path will be used as the log file name's prefix.
# info-log-dir = ""

## RocksDB log levels
# info-log-level = "info"

## Options for `Titan`.
[rocksdb.titan]
## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once
## enabled, it can't fall back. Forced fallback may result in data loss.
## default: false
# enabled = false

## Maximum number of threads of `Titan` background gc jobs.
## default: 4
# max-background-gc = 4

## Options for "Default" Column Family, which stores actual user data.
[rocksdb.defaultcf]
## Compression method (if any) is used to compress a block.
##   no:     kNoCompression
##   snappy: kSnappyCompression
##   zlib:   kZlibCompression
##   bzip2:  kBZip2Compression
##   lz4:    kLZ4Compression
##   lz4hc:  kLZ4HCCompression
##   zstd:   kZSTD
## `lz4` is a compression algorithm with moderate speed and compression ratio. The compression
## ratio of `zlib` is high. It is friendly to the storage space, but its compression speed is
## slow. This compression occupies many CPU resources.

## Per level compression.
## This config should be chosen carefully according to CPU and I/O resources. For example, if you
## use the compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and find much I/O pressure of the
## system (run the `iostat` command to find %util lasts 100%, or run the `top` command to find many
## iowaits) when writing (importing) a lot of data while the CPU resources are adequate, you can
## compress level-0 and level-1 and exchange CPU resources for I/O resources. If you use the
## compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and you find the I/O pressure of the system is
## not big when writing a lot of data, but CPU resources are inadequate. Then run the `top` command
## and choose the `-H` option. If you find a lot of bg threads (namely the compression thread of
## RocksDB) are running, you can exchange I/O resources for CPU resources and change the compression
## mode to "no:no:no:lz4:lz4:zstd:zstd". In a word, it aims at making full use of the existing
## resources of the system and improving TiKV performance in terms of the current resources.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]

## Set zstd compression for the bottommost level.
## This config overrides compression-per-level. It use zstd for bottommost level to ensure
## consistent compression ratio, regardless of overall data size. If explicitly setting compression type
## for the bottommost level is not desired, "disable" should be used.
# bottommost-level-compression = "zstd"

## The data block size. RocksDB compresses data based on the unit of block.
## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that
## the block size specified here corresponds to uncompressed data.
# block-size = "64KB"

## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters
## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive
## rate. Larger `bloom-filter-bits-per-key` values will reduce false positive rate, but increase
## memory usage and space amplification.
# bloom-filter-bits-per-key = 10

## `false` means one SST file one bloom filter, `true` means every block has a corresponding bloom
## filter.
# block-based-bloom-filter = false

# level0-file-num-compaction-trigger = 4

## Soft limit on number of level-0 files.
## When the number of SST files of level-0 reaches the limit of `level0-slowdown-writes-trigger`,
## RocksDB tries to slow down the write operation, because too many SST files of level-0 can cause
## higher read pressure of RocksDB.
# level0-slowdown-writes-trigger = 20

## Maximum number of level-0 files.
## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`,
## RocksDB stalls the new write operation.
# level0-stop-writes-trigger = 20

## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a
## sorted on-disk file. It is the RocksDB MemTable size.
# write-buffer-size = "128MB"

## The maximum number of the MemTables. The data written into RocksDB is first recorded in the WAL
## log, and then inserted into MemTables. When the MemTable reaches the size limit of
## `write-buffer-size`, it turns into read only and generates a new MemTable receiving new write
## operations. The flush threads of RocksDB will flush the read only MemTable to the disks to become
## an SST file of level0. `max-background-flushes` controls the maximum number of flush threads.
## When the flush threads are busy, resulting in the number of the MemTables waiting to be flushed
## to the disks reaching the limit of `max-write-buffer-number`, RocksDB stalls the new operation.
## "Stall" is a flow control mechanism of RocksDB. When importing data, you can set the
## `max-write-buffer-number` value higher, like 10.
# max-write-buffer-number = 5

## The minimum number of write buffers that will be merged together before writing to storage.
# min-write-buffer-number-to-merge = 1

## Control maximum total data size for base level (level 1).
## When the level-1 data size reaches the limit value of `max-bytes-for-level-base`, the SST files
## of level-1 and their overlap SST files of level-2 will be compacted. The golden rule: the first
## reference principle of setting `max-bytes-for-level-base` is guaranteeing that the
## `max-bytes-for-level-base` value is roughly equal to the data volume of level-0. Thus
## unnecessary compaction is reduced. For example, if the compression mode is
## "no:no:lz4:lz4:lz4:lz4:lz4", the `max-bytes-for-level-base` value can be `write-buffer-size * 4`,
## because there is no compression of level-0 and level-1 and the trigger condition of compaction
## for level-0 is that the number of the SST files reaches 4 (the default value). When both level-0
## and level-1 adopt compaction, it is necessary to analyze RocksDB logs to know the size of an SST
## file compressed from a MemTable. For example, if the file size is 32MB, the proposed value of
## `max-bytes-for-level-base` is 32MB * 4 = 128MB.
# max-bytes-for-level-base = "512MB"

## Target file size for compaction.
## The SST file size of level-0 is influenced by the compaction algorithm of `write-buffer-size`
## and level0. `target-file-size-base` is used to control the size of a single SST file of level1 to
## level6.
# target-file-size-base = "8MB"

## Max bytes for `compaction.max_compaction_bytes`.
## If it's necessary to enlarge value of this entry, it's better to also enlarge `reserve-space`
## in `storage` to ensure that a restarted TiKV instance can perform compactions successfully.
# max-compaction-bytes = "2GB"

## There are four different compaction priorities.
## "by-compensated-size"
## "oldest-largest-seq-first"
## "oldest-smallest-seq-first"
## "min-overlapping-ratio"
# compaction-pri = "min-overlapping-ratio"

## Refer to storage.flow-control.soft-pending-compaction-bytes-limit.
# soft-pending-compaction-bytes-limit = "192GB"

## Refer to storage.flow-control.hard-pending-compaction-bytes-limit.
# hard-pending-compaction-bytes-limit = "1000GB"

## Indicating if we'd put index/filter blocks to the block cache.
## If not specified, each "table reader" object will pre-load index/filter block during table
## initialization.
# cache-index-and-filter-blocks = true

## Pin level-0 filter and index blocks in cache.
# pin-l0-filter-and-index-blocks = true

## Enable read amplification statistics.
## value  =>  memory usage (percentage of loaded blocks memory)
## 1      =>  12.50 %
## 2      =>  06.25 %
## 4      =>  03.12 %
## 8      =>  01.56 %
## 16     =>  00.78 %
# read-amp-bytes-per-bit = 0

## Pick target size of each level dynamically.
# dynamic-level-bytes = true

## Optimizes bloom filters. If true, RocksDB won't create bloom filters for the max level of
## the LSM to reduce metadata that should fit in RAM.
## This value is setted to true for `default` cf by default because its kv data could be determined
## whether really exists by upper logic instead of bloom filters. But we suggest to set it to false
## while using `Raw` mode.
# optimize-filters-for-hits = true

## Enable compaction guard, which is an optimization to split SST files at TiKV region boundaries.
## The optimization can help reduce compaction IO, and allow us to use larger SST file size
## (thus less SST files overall) while making sure we can still efficiently cleanup stale data on
## region migration.
## This config is available to default CF and write CF.
# enable-compaction-guard = true

## The lower bound of SST file size when compaction guard is enabled. The config prevent SST files
## being too small when compaction guard is enabled.
# compaction-guard-min-output-file-size = "8M"

## The upper bound of SST file size when compaction guard is enabled. The config prevent SST files
## being too large when compaction guard is enabled. This config overrides target-file-size-base
## for the same CF.
# compaction-guard-max-output-file-size = "128M"

## Options for "Default" Column Family for `Titan`.
[rocksdb.defaultcf.titan]
## The smallest value to store in blob files. Value smaller than
## this threshold will be inlined in base DB.
## default: 1KB
# min-blob-size = "1KB"

## The compression algorithm used to compress data in blob files.
## Compression method.
##   no:     kNoCompression
##   snappy: kSnappyCompression
##   zlib:   kZlibCompression
##   bzip2:  kBZip2Compression
##   lz4:    kLZ4Compression
##   lz4hc:  kLZ4HCCompression
##   zstd:   kZSTD
## default: lz4
# blob-file-compression = "lz4"

## Specifics cache size for blob records
## default: 0
# blob-cache-size = "0GB"

## If the ratio of discardable size of a blob file is larger than
## this threshold, the blob file will be GCed out.
## default: 0.5
# discardable-ratio = 0.5

## The mode used to process blob files. In read-only mode Titan
## stops writing value into blob log. In fallback mode Titan
## converts blob index into real value on flush and compaction.
## This option is especially useful for downgrading Titan.
##   default:   kNormal
##   read-only: kReadOnly
##   fallback:  kFallback
## default: normal
# blob-run-mode = "normal"

## If set true, values in blob file will be merged to a new blob file while
## their corresponding keys are compacted to last two level in LSM-Tree.
##
## With this feature enabled, Titan could get better scan performance, and
## better write performance during GC, but will suffer around 1.1 space
## amplification and 3 more write amplification if no GC needed (eg. uniformly
## distributed keys) under default rocksdb setting.
##
## Requirement: level_compaction_dynamic_level_base = true
## default: false
# level-merge = false

## Use merge operator to rewrite GC blob index.
## default: false
# gc-merge-rewrite = false

## Options for "Write" Column Family, which stores MVCC commit information
[rocksdb.writecf]
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size = "64KB"

## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size = "128MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1

## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base = "512MB"
# target-file-size-base = "8MB"

# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 20
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
# compaction-pri = "min-overlapping-ratio"
# soft-pending-compaction-bytes-limit = "192GB"
# hard-pending-compaction-bytes-limit = "1000GB"
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = false
# enable-compaction-guard = true
# compaction-guard-min-output-file-size = "8M"
# compaction-guard-max-output-file-size = "128M"

[rocksdb.lockcf]
# compression-per-level = ["no", "no", "no", "no", "no", "no", "no"]
# block-size = "16KB"
# write-buffer-size = "32MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1
# max-bytes-for-level-base = "128MB"
# target-file-size-base = "8MB"
# level0-file-num-compaction-trigger = 1
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 20
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
# compaction-pri = "by-compensated-size"
# soft-pending-compaction-bytes-limit = "192GB"
# hard-pending-compaction-bytes-limit = "1000GB"
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = false
# enable-compaction-guard = false

[raftdb]
# max-background-jobs = 4
# max-sub-compactions = 2
# max-open-files = 40960
# max-manifest-file-size = "20MB"
# create-if-missing = true

# enable-statistics = true
# stats-dump-period = "10m"

## Raft RocksDB WAL directory.
## This config specifies the absolute directory path for WAL.
## If it is not set, the log files will be in the same directory as data.
## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
## improve performance.
## Do not set this config the same as `rocksdb.wal-dir`.
# wal-dir = ""

# compaction-readahead-size = 0
# writable-file-max-buffer-size = "1MB"
# use-direct-io-for-flush-and-compaction = false
# enable-pipelined-write = true
# allow-concurrent-memtable-write = true
# bytes-per-sync = "1MB"
# wal-bytes-per-sync = "512KB"

# info-log-max-size = "1GB"
# info-log-roll-time = "0s"
# info-log-keep-log-file-num = 10
# info-log-dir = ""
# info-log-level = "info"

[raftdb.defaultcf]
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size = "64KB"

## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size = "128MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1

## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base = "512MB"
# target-file-size-base = "8MB"

# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 20
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
# compaction-pri = "by-compensated-size"
# soft-pending-compaction-bytes-limit = "192GB"
# hard-pending-compaction-bytes-limit = "1000GB"
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = true
# enable-compaction-guard = false

[raft-engine]
## Determines whether to use Raft Engine to store raft logs. When it is
## enabled, configurations of `raftdb` are ignored.
# enable = true

## The directory at which raft log files are stored. If the directory does not
## exist, it will be created when TiKV is started.
##
## When this configuration is not set, `{data-dir}/raft-engine` is used.
##
## If there are multiple disks on your machine, it is recommended to store the
## data of Raft Engine on a different disk to improve TiKV performance.
# dir = ""

## Specifies the threshold size of a log batch. A log batch larger than this
## configuration is compressed.
##
## If you set this configuration item to `0`, compression is disabled.
# batch-compression-threshold = "8KB"

## Specifies the maximum accumulative size of buffered writes. When this
## configuration value is exceeded, buffered writes are flushed to the disk.
##
## If you set this configuration item to `0`, incremental sync is disabled.
# bytes-per-sync = "4MB"

## Specifies the maximum size of log files. When a log file is larger than this
## value, it is rotated.
# target-file-size = "128MB"

## Specifies the threshold size of the main log queue. When this configuration
## value is exceeded, the main log queue is purged.
##
## This configuration can be used to adjust the disk space usage of Raft
## Engine.
# purge-threshold = "10GB"

## Determines how to deal with file corruption during recovery.
##
## Candidates:
##   absolute-consistency
##   tolerate-tail-corruption
##   tolerate-any-corruption
# recovery-mode = "tolerate-tail-corruption"

## The minimum I/O size for reading log files during recovery.
##
## Default: "16KB". Minimum: "512B".
# recovery-read-block-size = "16KB"

## The number of threads used to scan and recover log files.
##
## Default: 4. Minimum: 1.
# recovery-threads = 4

## Memory usage limit for Raft Engine.
## When it's not set, 15% of available system memory will be used.
# memory-limit = "1GB"

[security]
## The path for TLS certificates. Empty string means disabling secure connections.
# ca-path = ""
# cert-path = ""
# key-path = ""
# cert-allowed-cn = []
#
## Avoid outputing data (e.g. user keys) to info log. It currently does not avoid printing
## user data altogether, but greatly reduce those logs.
## Default is false.
# redact-info-log = false

## Configurations for encryption at rest. Experimental.
[security.encryption]
## Encryption method to use for data files.
## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than
## "plaintext" means encryption is enabled, in which case master key must be specified.
# data-encryption-method = "plaintext"

## Specifies how often TiKV rotates data encryption key.
# data-key-rotation-period = "7d"

## Enable an optimization to reduce IO and mutex contention for encryption metadata management.
## Once the option is turned on (which is the default after 4.0.9), the data format is not
## compatible with TiKV <= 4.0.8. In order to downgrade to TiKV <= 4.0.8, one can turn off this
## option and restart TiKV, after which TiKV will convert the data format to be compatible with
## previous versions.
# enable-file-dictionary-log = true

## Specifies master key if encryption is enabled. There are three types of master key:
##
##   * "plaintext":
##
##     Plaintext as master key means no master key is given and only applicable when
##     encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't
##     have sub-config items. Example:
##     
##     [security.encryption.master-key]
##     type = "plaintext"
##
##   * "kms":
##
##     Use a KMS service to supply master key. Currently only AWS KMS is supported. This type of
##     master key is recommended for production use. Example:
##
##     [security.encryption.master-key]
##     type = "kms"
##     ## KMS CMK key id. Must be a valid KMS CMK where the TiKV process has access to.
##     ## In production is recommended to grant access of the CMK to TiKV using IAM.
##     key-id = "1234abcd-12ab-34cd-56ef-1234567890ab"
##     ## AWS region of the KMS CMK.
##     region = "us-west-2"
##     ## (Optional) AWS KMS service endpoint. Only required when non-default KMS endpoint is
##     ## desired.
##     endpoint = "https://kms.us-west-2.amazonaws.com"
##
##   * "file":
##
##     Supply a custom encryption key stored in a file. It is recommended NOT to use in production,
##     as it breaks the purpose of encryption at rest, unless the file is stored in tempfs.
##     The file must contain a 256-bits (32 bytes, regardless of key length implied by 
##     data-encryption-method) key encoded as hex string and end with newline ("\n"). Example:
##
##     [security.encryption.master-key]
##     type = "file"
##     path = "/path/to/master/key/file"
##
# [security.encryption.master-key]
# type = "plaintext"

## Specifies the old master key when rotating master key. Same config format as master-key.
## The key is only access once during TiKV startup, after that TiKV do not need access to the key.
## And it is okay to leave the stale previous-master-key config after master key rotation.
# [security.encryption.previous-master-key]
# type = "plaintext"

[import]
## Number of threads to handle RPC requests.
# num-threads = 8

## Stream channel window size, stream will be blocked on channel full.
# stream-channel-window = 128

[backup]
## Number of threads to perform backup tasks.
## The default value is set to min(CPU_NUM * 0.5, 8).
# num-threads = 8

## Number of ranges to backup in one batch.
# batch-size = 8

## When Backup region [a,e) size exceeds `sst-max-size`, it will be backuped into several Files [a,b),
## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `sst-max-size` (or a
## little larger).
# sst-max-size = "144MB"

## Automatically reduce the number of backup threads when the current workload is high,
## in order to reduce impact on the cluster's performance during back up.
# enable-auto-tune = true 

[log-backup] 
## Number of threads to perform backup stream tasks.
## The default value is set to min(CPU_NUM * 0.5, 8).
# num-threads = 8

## enable this feature. TiKV will starts watch related tasks in PD. and backup kv changes to storage accodring to task.
## The default value is false.
# enable = false 

[backup.hadoop]
## let TiKV know how to find the hdfs shell command.
## Equivalent to the $HADOOP_HOME enviroment variable.
# home = ""

## TiKV will run the hdfs shell command under this linux user.
## TiKV will use the current linux user if not provided.
# linux-user = ""

[pessimistic-txn]
## The default and maximum delay before responding to TiDB when pessimistic
## transactions encounter locks
# wait-for-lock-timeout = "1s"

## If more than one transaction is waiting for the same lock, only the one with smallest
## start timestamp will be waked up immediately when the lock is released. Others will
## be waked up after `wake_up_delay_duration` to reduce contention and make the oldest
## one more likely acquires the lock.
# wake-up-delay-duration = "20ms"

## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions.
## Enabling this will improve performance, but slightly increase the transaction failure rate
# pipelined = true

## Enable in-memory pessimistic lock, only effect when processing perssimistic transactions.
## Enabling this will improve performance, but slightly increase the transaction failure rate.
## It only takes effect when `pessimistic-txn.pipelined` is also set to true.
# in-memory = true

[gc]
## The number of keys to GC in one batch.
# batch-keys = 512

## Max bytes that GC worker can write to rocksdb in one second.
## If it is set to 0, there is no limit.
# max-write-bytes-per-sec = "0"

## Enable GC by compaction filter or not.
# enable-compaction-filter = true

## Garbage ratio threshold to trigger a GC.
# ratio-threshold = 1.1