From 619a525bc8f1098297259ddb296b4b5dee223944 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Grze=C5=9Bkiewicz?= <lemures64@gmail.com>
Date: Thu, 20 Jun 2024 18:38:37 +0200
Subject: [PATCH] feat(docs): Pruning and Snapshots recovery basic docs (#2265)

Signed-off-by: tomg10 <lemures64@gmail.com>
---
 core/bin/external_node/src/config/mod.rs      |  2 +-
 .../external-node/07_snapshots_recovery.md    | 30 ++++++++++++++
 docs/guides/external-node/08_pruning.md       | 40 +++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 docs/guides/external-node/07_snapshots_recovery.md
 create mode 100644 docs/guides/external-node/08_pruning.md
diff --git a/core/bin/external_node/src/config/mod.rs b/core/bin/external_node/src/config/mod.rs
index 9cd6a758a25c..b47ae3f8886e 100644
--- a/core/bin/external_node/src/config/mod.rs
+++ b/core/bin/external_node/src/config/mod.rs
@@ -541,7 +541,7 @@ impl OptionalENConfig {
     }
 
     fn default_pruning_data_retention_sec() -> u64 {
-        3_600 // 1 hour
+        3_600 * 24 * 7 // 7 days
     }
 
     fn from_env() -> anyhow::Result<Self> {
diff --git a/docs/guides/external-node/07_snapshots_recovery.md b/docs/guides/external-node/07_snapshots_recovery.md
new file mode 100644
index 000000000000..94d279e358de
--- /dev/null
+++ b/docs/guides/external-node/07_snapshots_recovery.md
@@ -0,0 +1,30 @@
+# Snapshots Recovery
+
+Instead of starting node using DB snapshots, it's possible to configure them to start from a protocol-level snapshots.
+This process is much faster and requires way less storage. Postgres database of a mainnet node recovered from a snapshot
+is only about 300GB. Without [_pruning_](08_pruning.md) enabled, the state will continuously grow about 15GB per day.
+
+> [!NOTE]
+>
+> Nodes recovered from snapshot don't have any historical data from before the recovery!
+
+## Configuration
+
+To enable snapshots-recovery on mainnet, you need to set environment variables:
+
+```yaml
+EN_SNAPSHOTS_RECOVERY_ENABLED: 'true'
+EN_SNAPSHOTS_OBJECT_STORE_BUCKET_BASE_URL: 'zksync-era-mainnet-external-node-snapshots'
+EN_SNAPSHOTS_OBJECT_STORE_MODE: 'GCSAnonymousReadOnly'
+```
+
+For sepolia testnet, use:
+
+```yaml
+EN_SNAPSHOTS_RECOVERY_ENABLED: 'true'
+EN_SNAPSHOTS_OBJECT_STORE_BUCKET_BASE_URL: 'zksync-era-boojnet-external-node-snapshots'
+EN_SNAPSHOTS_OBJECT_STORE_MODE: 'GCSAnonymousReadOnly'
+```
+
+For a working examples of a fully configured Nodes recovering from snapshots, see
+[_docker compose examples_](docker-compose-examples) directory and [_Quick Start_](00_quick_start.md)
diff --git a/docs/guides/external-node/08_pruning.md b/docs/guides/external-node/08_pruning.md
new file mode 100644
index 000000000000..c7f834214ae7
--- /dev/null
+++ b/docs/guides/external-node/08_pruning.md
@@ -0,0 +1,40 @@
+# Pruning
+
+It is possible to configure ZKsync Node to periodically remove all data from batches older than a configurable
+threshold. Data is pruned both from Postgres and from tree (RocksDB).
+
+> [!NOTE]
+>
+> If you need a node with data retention period of up to a few days, please set up a node from a
+> [_snapshot_](07_snapshots_recovery.md) and wait for it to have enough data. Pruning an archival node can take
+> unpractical amount of time. In the future we will be offering pre-pruned DB snapshots with a few months of data.
+
+## Configuration
+
+You can enable pruning by setting the environment variable
+
+```yaml
+EN_PRUNING_ENABLED: 'true'
+```
+
+By default, it will keep history for 7 days. You can configure retention period using:
+
+```yaml
+EN_PRUNING_DATA_RETENTION_SEC: '259200' # 3 days
+```
+
+The data retention can be set to any value, but for mainnet values under 21h will be ignored as the batch can only be
+pruned as soon as it has been executed on Ethereum.
+
+## Storage requirements for pruned nodes
+
+The storage requirements depend on how long you configure to retain the data, but are roughly:
+
+- **40GB + ~5GB/day of retained data** of disk space needed on machine that runs the node
+- **300GB + ~15GB/day of retained data** of disk space for Postgres
+
+> [!NOTE]
+>
+> When pruning an existing archival node, Postgres will be unable to reclaim disk space automatically, to reclaim disk
+> space, you need to manually run VACUUM FULL, which requires an ACCESS EXCLUSIVE lock, you can read more about it in
+> [_postgres docs_](https://www.postgresql.org/docs/current/sql-vacuum.html)