From 54f97cb5dc2f71cdd3c43833e4d77ae67cb798d6 Mon Sep 17 00:00:00 2001 From: Martin Martinez Rivera Date: Mon, 24 Aug 2020 10:47:15 -0700 Subject: [PATCH] fix(Dgraph): Stop forcing RAM mode for the write-ahead log. (#6142) (#6260) This change also adds a way to set the table and value log loading modes for the w directory independently of the values for the p directory. Fixes DGRAPH-1898. (cherry picked from commit 5f5aa9c1ab7505122b6e394a8af24ce0d52d9823) --- dgraph/cmd/alpha/run.go | 46 +++++++++++++++++++++++++++++++++-------- worker/config.go | 8 +++++-- worker/server_state.go | 29 +++++++++++++++----------- 3 files changed, 60 insertions(+), 23 deletions(-) diff --git a/dgraph/cmd/alpha/run.go b/dgraph/cmd/alpha/run.go index 79680f95669..60e98d842ed 100644 --- a/dgraph/cmd/alpha/run.go +++ b/dgraph/cmd/alpha/run.go @@ -99,13 +99,17 @@ they form a Raft group and provide synchronous replication. flag.StringP("postings", "p", "p", "Directory to store posting lists.") // Options around how to set up Badger. - flag.String("badger.tables", "mmap", - "[ram, mmap, disk] Specifies how Badger LSM tree is stored. "+ - "Option sequence consume most to least RAM while providing best to worst read "+ - "performance respectively.") - flag.String("badger.vlog", "mmap", - "[mmap, disk] Specifies how Badger Value log is stored."+ - " mmap consumes more RAM, but provides better performance.") + flag.String("badger.tables", "mmap,mmap", + "[ram, mmap, disk] Specifies how Badger LSM tree is stored for the postings and "+ + "write-ahead directory. Option sequence consume most to least RAM while providing "+ + "best to worst read performance respectively. If you pass two values separated by a "+ + "comma, the first value will be used for the postings directory and the second for "+ + "the write-ahead log directory.") + flag.String("badger.vlog", "mmap,mmap", + "[mmap, disk] Specifies how Badger Value log is stored for the postings and write-ahead "+ + "log directory. mmap consumes more RAM, but provides better performance. If you pass "+ + "two values separated by a comma the first value will be used for the postings "+ + "directory and the second for the w directory.") flag.Int("badger.compression_level", 3, "The compression level for Badger. A higher value uses more resources.") flag.String("encryption_key_file", "", @@ -525,8 +529,6 @@ func run() { bindall = Alpha.Conf.GetBool("bindall") opts := worker.Options{ - BadgerTables: Alpha.Conf.GetString("badger.tables"), - BadgerVlog: Alpha.Conf.GetString("badger.vlog"), BadgerKeyFile: Alpha.Conf.GetString("encryption_key_file"), BadgerCompressionLevel: Alpha.Conf.GetInt("badger.compression_level"), @@ -543,6 +545,32 @@ func run() { glog.Fatalf("Cannot enable encryption: %s", x.ErrNotSupported) } + badgerTables := strings.Split(Alpha.Conf.GetString("badger.tables"), ",") + if len(badgerTables) != 1 && len(badgerTables) != 2 { + glog.Fatalf("Unable to read badger.tables options. Expected single value or two "+ + "comma-separated values. Got %s", Alpha.Conf.GetString("badger.tables")) + } + if len(badgerTables) == 1 { + opts.BadgerTables = badgerTables[0] + opts.BadgerWalTables = badgerTables[0] + } else { + opts.BadgerTables = badgerTables[0] + opts.BadgerWalTables = badgerTables[1] + } + + badgerVlog := strings.Split(Alpha.Conf.GetString("badger.vlog"), ",") + if len(badgerVlog) != 1 && len(badgerVlog) != 2 { + glog.Fatalf("Unable to read badger.vlog options. Expected single value or two "+ + "comma-separated values. Got %s", Alpha.Conf.GetString("badger.vlog")) + } + if len(badgerVlog) == 1 { + opts.BadgerVlog = badgerVlog[0] + opts.BadgerWalVlog = badgerVlog[0] + } else { + opts.BadgerVlog = badgerVlog[0] + opts.BadgerWalVlog = badgerVlog[1] + } + secretFile := Alpha.Conf.GetString("acl_secret_file") if secretFile != "" { hmacSecret, err := ioutil.ReadFile(secretFile) diff --git a/worker/config.go b/worker/config.go index 1b41d42c85f..f05e17a6a38 100644 --- a/worker/config.go +++ b/worker/config.go @@ -39,12 +39,16 @@ const ( type Options struct { // PostingDir is the path to the directory storing the postings.. PostingDir string - // BadgerTables is the name of the mode used to load the badger tables. + // BadgerTables is the name of the mode used to load the badger tables for the p directory. BadgerTables string - // BadgerVlog is the name of the mode used to load the badger value log. + // BadgerVlog is the name of the mode used to load the badger value log for the p directory. BadgerVlog string // BadgerKeyFile is the file containing the key used for encryption. Enterprise only feature. BadgerKeyFile string + // BadgerWalTables is the name of the mode used to load the badger tables for the w directory. + BadgerWalTables string + // BadgerWalVlog is the name of the mode used to load the badger value log for the w directory. + BadgerWalVlog string // BadgerCompressionLevel is the ZSTD compression level used by badger. A // higher value means more CPU intensive compression and better compression // ratio. diff --git a/worker/server_state.go b/worker/server_state.go index a2c4d8e2d0c..5f30a2760a4 100644 --- a/worker/server_state.go +++ b/worker/server_state.go @@ -65,7 +65,7 @@ func InitServerState() { x.WorkerConfig.ProposedGroupId = groupId } -func setBadgerOptions(opt badger.Options) badger.Options { +func setBadgerOptions(opt badger.Options, wal bool) badger.Options { opt = opt.WithSyncWrites(false). WithTruncate(true). WithLogger(&x.ToGlog{}). @@ -89,8 +89,20 @@ func setBadgerOptions(opt badger.Options) badger.Options { opt.ZSTDCompressionLevel = Config.BadgerCompressionLevel } + var badgerTables string + var badgerVlog string + if wal { + // Settings for the write-ahead log. + badgerTables = Config.BadgerWalTables + badgerVlog = Config.BadgerWalVlog + } else { + // Settings for the data directory. + badgerTables = Config.BadgerTables + badgerVlog = Config.BadgerVlog + } + glog.Infof("Setting Badger table load option: %s", Config.BadgerTables) - switch Config.BadgerTables { + switch badgerTables { case "mmap": opt.TableLoadingMode = options.MemoryMap case "ram": @@ -102,7 +114,7 @@ func setBadgerOptions(opt badger.Options) badger.Options { } glog.Infof("Setting Badger value log load option: %s", Config.BadgerVlog) - switch Config.BadgerVlog { + switch badgerVlog { case "mmap": opt.ValueLogLoadingMode = options.MemoryMap case "disk": @@ -131,17 +143,10 @@ func (s *ServerState) initStorage() { // Write Ahead Log directory x.Checkf(os.MkdirAll(Config.WALDir, 0700), "Error while creating WAL dir.") opt := badger.LSMOnlyOptions(Config.WALDir) - opt = setBadgerOptions(opt) + opt = setBadgerOptions(opt, true) opt.ValueLogMaxEntries = 10000 // Allow for easy space reclamation. opt.MaxCacheSize = 10 << 20 // 10 mb of cache size for WAL. - // We should always force load LSM tables to memory, disregarding user settings, because - // Raft.Advance hits the WAL many times. If the tables are not in memory, retrieval slows - // down way too much, causing cluster membership issues. Because of prefix compression and - // value separation provided by Badger, this is still better than using the memory based WAL - // storage provided by the Raft library. - opt.TableLoadingMode = options.LoadToRAM - // Print the options w/o exposing key. // TODO: Build a stringify interface in Badger options, which is used to print nicely here. key := opt.EncryptionKey @@ -164,7 +169,7 @@ func (s *ServerState) initStorage() { WithKeepBlockIndicesInCache(true). WithKeepBlocksInCache(true). WithMaxBfCacheSize(500 << 20) // 500 MB of bloom filter cache. - opt = setBadgerOptions(opt) + opt = setBadgerOptions(opt, false) // Print the options w/o exposing key. // TODO: Build a stringify interface in Badger options, which is used to print nicely here.