From f842c7698fc4758b14be208be6577267312989e7 Mon Sep 17 00:00:00 2001 From: Hansie Odendaal <39146854+hansieodendaal@users.noreply.github.com> Date: Tue, 26 Mar 2024 14:41:00 +0200 Subject: [PATCH] feat: add dynamic growth to lmdb (#6231) Description --- Added dynamic growth size to LMBD, where it will grow with the configured size as well an optional shortfall size that could not be written. This is especially relevant during block sync of many full blocks where the output SMT has grown very large. Motivation and Context --- Block sync did not work with many full blocks. ```rust 2024-03-25 07:27:14.304496100 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 5 time(s) in this transaction) 2024-03-25 07:27:14.317099900 [lmdb] DEBUG (\node_05\esmeralda\data/base_node\db) LMDB MB, mapsize was grown from 1616 MB to 1632 MB, increased by 16 MB 2024-03-25 07:27:14.319843700 [c::bn::block_sync] WARN Chain storage error: DB transaction was too large (3 operations) 2024-03-25 07:27:14.319864400 [c::bn::block_sync] WARN Block sync failed: No more sync peers available: Block sync failed 2024-03-25 07:27:14.319969400 [c::cs::database] INFO Rewinding headers from height 6325 to 1627 ``` How Has This Been Tested? --- System-level archival sync-from-scratch test on esmeralda after a coin-split and transaction stress test. Before the fix, blocks #1544 to #1584. Multiple resizes for the same set of write operations was required, with the final one resulting in block sync failure. ```rust 2024-03-25 07:19:35.346281600 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 1 time(s) in this transaction) 2024-03-25 07:19:35.654103900 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 2 time(s) in this transaction) 2024-03-25 07:19:35.952783600 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 3 time(s) in this transaction) 2024-03-25 07:19:41.198100900 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 1 time(s) in this transaction) 2024-03-25 07:19:41.519953900 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 2 time(s) in this transaction) 2024-03-25 07:19:41.827079500 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 3 time(s) in this transaction) 2024-03-25 07:19:42.136522700 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 4 time(s) in this transaction) 2024-03-25 07:20:29.331297000 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 1 time(s) in this transaction) 2024-03-25 07:20:29.755442600 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 2 time(s) in this transaction) 2024-03-25 07:20:30.119457000 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 3 time(s) in this transaction) 2024-03-25 07:20:30.491588200 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 4 time(s) in this transaction) 2024-03-25 07:20:30.868365300 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 5 time(s) in this transaction) ``` After the fix, blocks #1544 to #1584. Only a single resize each time for a set of write operations was required. ```rust 2024-03-25 16:40:28.814566400 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 1 time(s) in this transaction) 2024-03-25 16:42:05.167759000 [c::cs::lmdb_db::lmdb_db] INFO Database resize required (resized 1 time(s) in this transaction) ``` What process can a PR reviewer use to test or verify this change? --- Code review Breaking Changes --- - [x] None - [ ] Requires data directory on base node to be deleted - [ ] Requires hard fork - [ ] Other - Please specify --- base_layer/core/src/chain_storage/error.rs | 6 +-- .../core/src/chain_storage/lmdb_db/lmdb.rs | 6 +-- .../core/src/chain_storage/lmdb_db/lmdb_db.rs | 54 +++++++++++++++++-- .../storage/src/lmdb_store/store.rs | 22 +++----- 4 files changed, 63 insertions(+), 25 deletions(-) diff --git a/base_layer/core/src/chain_storage/error.rs b/base_layer/core/src/chain_storage/error.rs index f9551e336c..7261030149 100644 --- a/base_layer/core/src/chain_storage/error.rs +++ b/base_layer/core/src/chain_storage/error.rs @@ -118,7 +118,7 @@ pub enum ChainStorageError { #[error("Key {key} in {table_name} already exists")] KeyExists { table_name: &'static str, key: String }, #[error("Database resize required")] - DbResizeRequired, + DbResizeRequired(Option), #[error("DB transaction was too large ({0} operations)")] DbTransactionTooLarge(usize), #[error("DB needs to be resynced: {0}")] @@ -183,7 +183,7 @@ impl ChainStorageError { _err @ ChainStorageError::IoError(_) | _err @ ChainStorageError::CannotCalculateNonTipMmr(_) | _err @ ChainStorageError::KeyExists { .. } | - _err @ ChainStorageError::DbResizeRequired | + _err @ ChainStorageError::DbResizeRequired(_) | _err @ ChainStorageError::DbTransactionTooLarge(_) | _err @ ChainStorageError::DatabaseResyncRequired(_) | _err @ ChainStorageError::BlockError(_) | @@ -213,7 +213,7 @@ impl From for ChainStorageError { field: "", value: "".to_string(), }, - Code(error::MAP_FULL) => ChainStorageError::DbResizeRequired, + Code(error::MAP_FULL) => ChainStorageError::DbResizeRequired(None), _ => ChainStorageError::AccessError(err.to_string()), } } diff --git a/base_layer/core/src/chain_storage/lmdb_db/lmdb.rs b/base_layer/core/src/chain_storage/lmdb_db/lmdb.rs index ce660df04a..fb85cbb95f 100644 --- a/base_layer/core/src/chain_storage/lmdb_db/lmdb.rs +++ b/base_layer/core/src/chain_storage/lmdb_db/lmdb.rs @@ -86,7 +86,7 @@ where target: LOG_TARGET, "Could not insert {} bytes with key '{}' into '{}' ({:?})", val_buf.len(), to_hex(key.as_lmdb_bytes()), table_name, err ); - Err(ChainStorageError::DbResizeRequired) + Err(ChainStorageError::DbResizeRequired(Some(val_buf.len()))) }, Err(e) => { error!( @@ -116,7 +116,7 @@ where txn.access().put(db, key, &val_buf, put::Flags::empty()).map_err(|e| { if let lmdb_zero::Error::Code(code) = &e { if *code == lmdb_zero::error::MAP_FULL { - return ChainStorageError::DbResizeRequired; + return ChainStorageError::DbResizeRequired(Some(val_buf.len())); } } error!( @@ -137,7 +137,7 @@ where txn.access().put(db, key, &val_buf, put::Flags::empty()).map_err(|e| { if let lmdb_zero::Error::Code(code) = &e { if *code == lmdb_zero::error::MAP_FULL { - return ChainStorageError::DbResizeRequired; + return ChainStorageError::DbResizeRequired(Some(val_buf.len())); } } error!( diff --git a/base_layer/core/src/chain_storage/lmdb_db/lmdb_db.rs b/base_layer/core/src/chain_storage/lmdb_db/lmdb_db.rs index d88618acf6..a74cca1897 100644 --- a/base_layer/core/src/chain_storage/lmdb_db/lmdb_db.rs +++ b/base_layer/core/src/chain_storage/lmdb_db/lmdb_db.rs @@ -23,7 +23,15 @@ use std::{convert::TryFrom, fmt, fs, fs::File, ops::Deref, path::Path, sync::Arc, time::Instant}; use fs2::FileExt; -use lmdb_zero::{open, ConstTransaction, Database, Environment, ReadTransaction, WriteTransaction}; +use lmdb_zero::{ + open, + traits::AsLmdbBytes, + ConstTransaction, + Database, + Environment, + ReadTransaction, + WriteTransaction, +}; use log::*; use primitive_types::U256; use serde::{Deserialize, Serialize}; @@ -55,6 +63,7 @@ use crate::{ error::{ChainStorageError, OrNotFound}, lmdb_db::{ composite_key::{CompositeKey, InputKey, OutputKey}, + helpers::serialize, lmdb::{ fetch_db_entry_sizes, lmdb_clear, @@ -321,6 +330,21 @@ impl LMDBDatabase { fn apply_db_transaction(&mut self, txn: &DbTransaction) -> Result<(), ChainStorageError> { #[allow(clippy::enum_glob_use)] use WriteOperation::*; + + // Ensure there will be enough space in the database to insert the block before it is attempted; this is more + // efficient than relying on an error if the LMDB environment map size was reached with each component's insert + // operation, with cleanup, resize and re-try. This will also prevent block sync from stalling due to LMDB + // environment map size being reached. + if txn.operations().iter().any(|op| { + matches!(op, InsertOrphanBlock { .. }) || + matches!(op, InsertTipBlockBody { .. }) || + matches!(op, InsertChainOrphanBlock { .. }) + }) { + unsafe { + LMDBStore::resize_if_required(&self.env, &self.env_config)?; + } + } + let write_txn = self.write_transaction()?; for op in txn.operations() { trace!(target: LOG_TARGET, "[apply_db_transaction] WriteOperation: {}", op); @@ -1397,7 +1421,29 @@ impl LMDBDatabase { fn insert_tip_smt(&self, txn: &WriteTransaction<'_>, smt: &OutputSmt) -> Result<(), ChainStorageError> { let k = MetadataKey::TipSmt; - lmdb_replace(txn, &self.tip_utxo_smt, &k.as_u32(), smt) + + match lmdb_replace(txn, &self.tip_utxo_smt, &k.as_u32(), smt) { + Ok(_) => { + trace!( + "Inserted {} bytes with key '{}' into 'tip_utxo_smt' (size {})", + serialize(smt).unwrap_or_default().len(), + to_hex(k.as_u32().as_lmdb_bytes()), + smt.size() + ); + Ok(()) + }, + Err(e) => { + if let ChainStorageError::DbResizeRequired(Some(val)) = e { + trace!( + "Could NOT insert {} bytes with key '{}' into 'tip_utxo_smt' (size {})", + val, + to_hex(k.as_u32().as_lmdb_bytes()), + smt.size() + ); + } + Err(e) + }, + } } fn update_block_accumulated_data( @@ -1761,7 +1807,7 @@ impl BlockchainBackend for LMDBDatabase { return Ok(()); }, - Err(ChainStorageError::DbResizeRequired) => { + Err(ChainStorageError::DbResizeRequired(shortfall)) => { info!( target: LOG_TARGET, "Database resize required (resized {} time(s) in this transaction)", @@ -1772,7 +1818,7 @@ impl BlockchainBackend for LMDBDatabase { // BlockchainDatabase, so we know there are no other threads taking out LMDB transactions when this // is called. unsafe { - LMDBStore::resize(&self.env, &self.env_config)?; + LMDBStore::resize(&self.env, &self.env_config, shortfall)?; } }, Err(e) => { diff --git a/infrastructure/storage/src/lmdb_store/store.rs b/infrastructure/storage/src/lmdb_store/store.rs index 0756a9cc52..471de6a3a6 100644 --- a/infrastructure/storage/src/lmdb_store/store.rs +++ b/infrastructure/storage/src/lmdb_store/store.rs @@ -92,7 +92,7 @@ impl LMDBConfig { impl Default for LMDBConfig { fn default() -> Self { - Self::new_from_mb(16, 16, 4) + Self::new_from_mb(16, 16, 8) } } @@ -426,21 +426,13 @@ impl LMDBStore { let stat = env.stat()?; let size_used_bytes = stat.psize as usize * env_info.last_pgno; let size_left_bytes = env_info.mapsize - size_used_bytes; - debug!( - target: LOG_TARGET, - "Resize check: Used bytes: {}, Remaining bytes: {}", size_used_bytes, size_left_bytes - ); if size_left_bytes <= config.resize_threshold_bytes { - Self::resize(env, config)?; debug!( target: LOG_TARGET, - "({}) LMDB size used {:?} MB, environment space left {:?} MB, increased by {:?} MB", - env.path()?.to_str()?, - size_used_bytes / BYTES_PER_MB, - size_left_bytes / BYTES_PER_MB, - config.grow_size_bytes / BYTES_PER_MB, + "Resize required: Used bytes: {}, Remaining bytes: {}", size_used_bytes, size_left_bytes ); + Self::resize(env, config, None)?; } Ok(()) } @@ -452,10 +444,10 @@ impl LMDBStore { /// not check for this condition, the caller must ensure it explicitly. /// /// - pub unsafe fn resize(env: &Environment, config: &LMDBConfig) -> Result<(), LMDBError> { + pub unsafe fn resize(env: &Environment, config: &LMDBConfig, shortfall: Option) -> Result<(), LMDBError> { let env_info = env.info()?; let current_mapsize = env_info.mapsize; - env.set_mapsize(current_mapsize + config.grow_size_bytes)?; + env.set_mapsize(current_mapsize + config.grow_size_bytes + shortfall.unwrap_or_default())?; let env_info = env.info()?; let new_mapsize = env_info.mapsize; debug!( @@ -464,7 +456,7 @@ impl LMDBStore { env.path()?.to_str()?, current_mapsize / BYTES_PER_MB, new_mapsize / BYTES_PER_MB, - config.grow_size_bytes / BYTES_PER_MB, + (config.grow_size_bytes + shortfall.unwrap_or_default()) / BYTES_PER_MB, ); Ok(()) @@ -498,7 +490,7 @@ impl LMDBDatabase { "Failed to obtain write transaction because the database needs to be resized" ); unsafe { - LMDBStore::resize(&self.env, &self.env_config)?; + LMDBStore::resize(&self.env, &self.env_config, Some(value.len()))?; } }, Err(e) => return Err(e.into()),