From 619a525bc8f1098297259ddb296b4b5dee223944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Grze=C5=9Bkiewicz?= Date: Thu, 20 Jun 2024 18:38:37 +0200 Subject: [PATCH] feat(docs): Pruning and Snapshots recovery basic docs (#2265) Signed-off-by: tomg10 --- core/bin/external_node/src/config/mod.rs | 2 +- .../external-node/07_snapshots_recovery.md | 30 ++++++++++++++ docs/guides/external-node/08_pruning.md | 40 +++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 docs/guides/external-node/07_snapshots_recovery.md create mode 100644 docs/guides/external-node/08_pruning.md diff --git a/core/bin/external_node/src/config/mod.rs b/core/bin/external_node/src/config/mod.rs index 9cd6a758a25c..b47ae3f8886e 100644 --- a/core/bin/external_node/src/config/mod.rs +++ b/core/bin/external_node/src/config/mod.rs @@ -541,7 +541,7 @@ impl OptionalENConfig { } fn default_pruning_data_retention_sec() -> u64 { - 3_600 // 1 hour + 3_600 * 24 * 7 // 7 days } fn from_env() -> anyhow::Result { diff --git a/docs/guides/external-node/07_snapshots_recovery.md b/docs/guides/external-node/07_snapshots_recovery.md new file mode 100644 index 000000000000..94d279e358de --- /dev/null +++ b/docs/guides/external-node/07_snapshots_recovery.md @@ -0,0 +1,30 @@ +# Snapshots Recovery + +Instead of starting node using DB snapshots, it's possible to configure them to start from a protocol-level snapshots. +This process is much faster and requires way less storage. Postgres database of a mainnet node recovered from a snapshot +is only about 300GB. Without [_pruning_](08_pruning.md) enabled, the state will continuously grow about 15GB per day. + +> [!NOTE] +> +> Nodes recovered from snapshot don't have any historical data from before the recovery! + +## Configuration + +To enable snapshots-recovery on mainnet, you need to set environment variables: + +```yaml +EN_SNAPSHOTS_RECOVERY_ENABLED: 'true' +EN_SNAPSHOTS_OBJECT_STORE_BUCKET_BASE_URL: 'zksync-era-mainnet-external-node-snapshots' +EN_SNAPSHOTS_OBJECT_STORE_MODE: 'GCSAnonymousReadOnly' +``` + +For sepolia testnet, use: + +```yaml +EN_SNAPSHOTS_RECOVERY_ENABLED: 'true' +EN_SNAPSHOTS_OBJECT_STORE_BUCKET_BASE_URL: 'zksync-era-boojnet-external-node-snapshots' +EN_SNAPSHOTS_OBJECT_STORE_MODE: 'GCSAnonymousReadOnly' +``` + +For a working examples of a fully configured Nodes recovering from snapshots, see +[_docker compose examples_](docker-compose-examples) directory and [_Quick Start_](00_quick_start.md) diff --git a/docs/guides/external-node/08_pruning.md b/docs/guides/external-node/08_pruning.md new file mode 100644 index 000000000000..c7f834214ae7 --- /dev/null +++ b/docs/guides/external-node/08_pruning.md @@ -0,0 +1,40 @@ +# Pruning + +It is possible to configure ZKsync Node to periodically remove all data from batches older than a configurable +threshold. Data is pruned both from Postgres and from tree (RocksDB). + +> [!NOTE] +> +> If you need a node with data retention period of up to a few days, please set up a node from a +> [_snapshot_](07_snapshots_recovery.md) and wait for it to have enough data. Pruning an archival node can take +> unpractical amount of time. In the future we will be offering pre-pruned DB snapshots with a few months of data. + +## Configuration + +You can enable pruning by setting the environment variable + +```yaml +EN_PRUNING_ENABLED: 'true' +``` + +By default, it will keep history for 7 days. You can configure retention period using: + +```yaml +EN_PRUNING_DATA_RETENTION_SEC: '259200' # 3 days +``` + +The data retention can be set to any value, but for mainnet values under 21h will be ignored as the batch can only be +pruned as soon as it has been executed on Ethereum. + +## Storage requirements for pruned nodes + +The storage requirements depend on how long you configure to retain the data, but are roughly: + +- **40GB + ~5GB/day of retained data** of disk space needed on machine that runs the node +- **300GB + ~15GB/day of retained data** of disk space for Postgres + +> [!NOTE] +> +> When pruning an existing archival node, Postgres will be unable to reclaim disk space automatically, to reclaim disk +> space, you need to manually run VACUUM FULL, which requires an ACCESS EXCLUSIVE lock, you can read more about it in +> [_postgres docs_](https://www.postgresql.org/docs/current/sql-vacuum.html)