From 636e4effd2a01b9d16624ec1e75346ca1c856a9d Mon Sep 17 00:00:00 2001 From: Alex Gherghisan Date: Thu, 22 Aug 2024 08:03:36 +0100 Subject: [PATCH] feat: track world state metrics (#8109) Extract relevant stats from world state: - sync duration (by synch type) - fork duration - db size (estimate) - individual tree sizes (ie. how many leaves are filled) --- .../aztec-node/src/aztec-node/server.ts | 4 +- .../composed/integration_l1_publisher.test.ts | 2 +- yarn-project/kv-store/src/interfaces/store.ts | 5 ++ yarn-project/kv-store/src/lmdb/store.ts | 12 ++++ .../prover-client/src/mocks/test_context.ts | 2 +- .../orchestrator_mixed_blocks_2.test.ts | 3 +- .../orchestrator_single_blocks.test.ts | 3 +- .../orchestrator_workflow.test.ts | 5 +- yarn-project/prover-node/src/factory.ts | 2 +- .../telemetry-client/src/attributes.ts | 2 + yarn-project/telemetry-client/src/metrics.ts | 5 ++ .../txe/src/txe_service/txe_service.ts | 3 +- yarn-project/world-state/package.json | 1 + .../world-state/src/synchronizer/factory.ts | 4 +- .../src/world-state-db/merkle_trees.ts | 33 +++++++--- .../world-state/src/world-state-db/metrics.ts | 64 +++++++++++++++++++ yarn-project/world-state/tsconfig.json | 3 + yarn-project/yarn.lock | 1 + 18 files changed, 134 insertions(+), 20 deletions(-) create mode 100644 yarn-project/world-state/src/world-state-db/metrics.ts diff --git a/yarn-project/aztec-node/src/aztec-node/server.ts b/yarn-project/aztec-node/src/aztec-node/server.ts index c7a4f0144fe..16091b8d8e4 100644 --- a/yarn-project/aztec-node/src/aztec-node/server.ts +++ b/yarn-project/aztec-node/src/aztec-node/server.ts @@ -153,7 +153,7 @@ export class AztecNodeService implements AztecNode { ); // now create the merkle trees and the world state synchronizer - const worldStateSynchronizer = await createWorldStateSynchronizer(config, store, archiver); + const worldStateSynchronizer = await createWorldStateSynchronizer(config, store, archiver, telemetry); // start both and wait for them to sync from the block source await Promise.all([p2pClient.start(), worldStateSynchronizer.start()]); @@ -723,7 +723,7 @@ export class AztecNodeService implements AztecNode { // Instantiate merkle trees so uncommitted updates by this simulation are local to it. // TODO we should be able to remove this after https://github.com/AztecProtocol/aztec-packages/issues/1869 // So simulation of public functions doesn't affect the merkle trees. - const merkleTrees = await MerkleTrees.new(this.merkleTreesDb, this.log); + const merkleTrees = await MerkleTrees.new(this.merkleTreesDb, new NoopTelemetryClient(), this.log); const publicProcessorFactory = new PublicProcessorFactory( merkleTrees.asLatest(), diff --git a/yarn-project/end-to-end/src/composed/integration_l1_publisher.test.ts b/yarn-project/end-to-end/src/composed/integration_l1_publisher.test.ts index 5516a198777..c43e5ef32b6 100644 --- a/yarn-project/end-to-end/src/composed/integration_l1_publisher.test.ts +++ b/yarn-project/end-to-end/src/composed/integration_l1_publisher.test.ts @@ -148,7 +148,7 @@ describe('L1Publisher integration', () => { }); const tmpStore = openTmpStore(); - builderDb = await MerkleTrees.new(tmpStore); + builderDb = await MerkleTrees.new(tmpStore, new NoopTelemetryClient()); blockSource = mock(); blockSource.getBlocks.mockResolvedValue([]); const worldStateConfig: WorldStateConfig = { diff --git a/yarn-project/kv-store/src/interfaces/store.ts b/yarn-project/kv-store/src/interfaces/store.ts index 36874f804c0..df37d45e0a6 100644 --- a/yarn-project/kv-store/src/interfaces/store.ts +++ b/yarn-project/kv-store/src/interfaces/store.ts @@ -68,4 +68,9 @@ export interface AztecKVStore { * Deletes the store */ delete(): Promise; + + /** + * Estimates the size of the store in bytes. + */ + estimateSize(): { bytes: number }; } diff --git a/yarn-project/kv-store/src/lmdb/store.ts b/yarn-project/kv-store/src/lmdb/store.ts index 885e49ce477..51358e2d596 100644 --- a/yarn-project/kv-store/src/lmdb/store.ts +++ b/yarn-project/kv-store/src/lmdb/store.ts @@ -154,4 +154,16 @@ export class AztecLmdbStore implements AztecKVStore { async delete() { await this.#rootDb.drop(); } + + estimateSize(): { bytes: number } { + const stats = this.#rootDb.getStats(); + // `mapSize` represents to total amount of memory currently being used by the database. + // since the database is mmap'd, this is a good estimate of the size of the database for now. + // http://www.lmdb.tech/doc/group__mdb.html#a4bde3c8b676457342cba2fe27aed5fbd + if ('mapSize' in stats && typeof stats.mapSize === 'number') { + return { bytes: stats.mapSize }; + } else { + return { bytes: 0 }; + } + } } diff --git a/yarn-project/prover-client/src/mocks/test_context.ts b/yarn-project/prover-client/src/mocks/test_context.ts index f1a20ccba0d..46b4f30a68a 100644 --- a/yarn-project/prover-client/src/mocks/test_context.ts +++ b/yarn-project/prover-client/src/mocks/test_context.ts @@ -68,8 +68,8 @@ export class TestContext { const publicContractsDB = mock(); const publicWorldStateDB = mock(); const publicKernel = new RealPublicKernelCircuitSimulator(new WASMSimulator()); - const actualDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest()); const telemetry = new NoopTelemetryClient(); + const actualDb = await MerkleTrees.new(openTmpStore(), telemetry).then(t => t.asLatest()); const processor = new PublicProcessor( actualDb, publicExecutor, diff --git a/yarn-project/prover-client/src/orchestrator/orchestrator_mixed_blocks_2.test.ts b/yarn-project/prover-client/src/orchestrator/orchestrator_mixed_blocks_2.test.ts index 838433006f1..98c72da2d0c 100644 --- a/yarn-project/prover-client/src/orchestrator/orchestrator_mixed_blocks_2.test.ts +++ b/yarn-project/prover-client/src/orchestrator/orchestrator_mixed_blocks_2.test.ts @@ -5,6 +5,7 @@ import { range } from '@aztec/foundation/array'; import { times } from '@aztec/foundation/collection'; import { createDebugLogger } from '@aztec/foundation/log'; import { openTmpStore } from '@aztec/kv-store/utils'; +import { NoopTelemetryClient } from '@aztec/telemetry-client/noop'; import { type MerkleTreeOperations, MerkleTrees } from '@aztec/world-state'; import { makeBloatedProcessedTx, updateExpectedTreesFromTxs } from '../mocks/fixtures.js'; @@ -18,7 +19,7 @@ describe('prover/orchestrator/mixed-blocks', () => { beforeEach(async () => { context = await TestContext.new(logger); - expectsDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest()); + expectsDb = await MerkleTrees.new(openTmpStore(), new NoopTelemetryClient()).then(t => t.asLatest()); }); afterEach(async () => { diff --git a/yarn-project/prover-client/src/orchestrator/orchestrator_single_blocks.test.ts b/yarn-project/prover-client/src/orchestrator/orchestrator_single_blocks.test.ts index 6afccf9312b..ed0c077c5c4 100644 --- a/yarn-project/prover-client/src/orchestrator/orchestrator_single_blocks.test.ts +++ b/yarn-project/prover-client/src/orchestrator/orchestrator_single_blocks.test.ts @@ -5,6 +5,7 @@ import { range } from '@aztec/foundation/array'; import { createDebugLogger } from '@aztec/foundation/log'; import { sleep } from '@aztec/foundation/sleep'; import { openTmpStore } from '@aztec/kv-store/utils'; +import { NoopTelemetryClient } from '@aztec/telemetry-client/noop'; import { type MerkleTreeOperations, MerkleTrees } from '@aztec/world-state'; import { makeBloatedProcessedTx, updateExpectedTreesFromTxs } from '../mocks/fixtures.js'; @@ -18,7 +19,7 @@ describe('prover/orchestrator/blocks', () => { beforeEach(async () => { context = await TestContext.new(logger); - expectsDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest()); + expectsDb = await MerkleTrees.new(openTmpStore(), new NoopTelemetryClient()).then(t => t.asLatest()); }); afterEach(async () => { diff --git a/yarn-project/prover-client/src/orchestrator/orchestrator_workflow.test.ts b/yarn-project/prover-client/src/orchestrator/orchestrator_workflow.test.ts index 3c34ff32dda..b49f7cddb13 100644 --- a/yarn-project/prover-client/src/orchestrator/orchestrator_workflow.test.ts +++ b/yarn-project/prover-client/src/orchestrator/orchestrator_workflow.test.ts @@ -23,9 +23,10 @@ describe('prover/orchestrator', () => { let mockProver: MockProxy; let actualDb: MerkleTreeOperations; beforeEach(async () => { - actualDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest()); + const telemetryClient = new NoopTelemetryClient(); + actualDb = await MerkleTrees.new(openTmpStore(), telemetryClient).then(t => t.asLatest()); mockProver = mock(); - orchestrator = new ProvingOrchestrator(actualDb, mockProver, new NoopTelemetryClient()); + orchestrator = new ProvingOrchestrator(actualDb, mockProver, telemetryClient); }); it('calls root parity circuit only when ready', async () => { diff --git a/yarn-project/prover-node/src/factory.ts b/yarn-project/prover-node/src/factory.ts index 2dbb9260d31..48045ef33da 100644 --- a/yarn-project/prover-node/src/factory.ts +++ b/yarn-project/prover-node/src/factory.ts @@ -35,7 +35,7 @@ export async function createProverNode( log.verbose(`Created archiver and synced to block ${await archiver.getBlockNumber()}`); const worldStateConfig = { ...config, worldStateProvenBlocksOnly: true }; - const worldStateSynchronizer = await createWorldStateSynchronizer(worldStateConfig, store, archiver); + const worldStateSynchronizer = await createWorldStateSynchronizer(worldStateConfig, store, archiver, telemetry); await worldStateSynchronizer.start(); const simulationProvider = await createSimulationProvider(config, log); diff --git a/yarn-project/telemetry-client/src/attributes.ts b/yarn-project/telemetry-client/src/attributes.ts index 728d2b0c73e..df65306f493 100644 --- a/yarn-project/telemetry-client/src/attributes.ts +++ b/yarn-project/telemetry-client/src/attributes.ts @@ -60,3 +60,5 @@ export const L1_TX_TYPE = 'aztec.l1.tx_type'; export const TX_PHASE_NAME = 'aztec.tx.phase_name'; /** The proving job type */ export const PROVING_JOB_TYPE = 'aztec.proving.job_type'; + +export const MERKLE_TREE_NAME = 'aztec.merkle_tree.name'; diff --git a/yarn-project/telemetry-client/src/metrics.ts b/yarn-project/telemetry-client/src/metrics.ts index 984862b425a..f8e25c83301 100644 --- a/yarn-project/telemetry-client/src/metrics.ts +++ b/yarn-project/telemetry-client/src/metrics.ts @@ -62,3 +62,8 @@ export const PROVING_ORCHESTRATOR_BASE_ROLLUP_INPUTS_DURATION = export const PROVING_QUEUE_JOB_SIZE = 'aztec.proving_queue.job_size'; export const PROVING_QUEUE_SIZE = 'aztec.proving_queue.size'; + +export const WORLD_STATE_FORK_DURATION = 'aztec.world_state.fork.duration'; +export const WORLD_STATE_SYNC_DURATION = 'aztec.world_state.sync.duration'; +export const WORLD_STATE_MERKLE_TREE_SIZE = 'aztec.world_state.merkle_tree_size'; +export const WORLD_STATE_DB_SIZE = 'aztec.world_state.db_size'; diff --git a/yarn-project/txe/src/txe_service/txe_service.ts b/yarn-project/txe/src/txe_service/txe_service.ts index 86985da26e7..a1699e7ac98 100644 --- a/yarn-project/txe/src/txe_service/txe_service.ts +++ b/yarn-project/txe/src/txe_service/txe_service.ts @@ -18,6 +18,7 @@ import { type Logger } from '@aztec/foundation/log'; import { KeyStore } from '@aztec/key-store'; import { openTmpStore } from '@aztec/kv-store/utils'; import { ExecutionNoteCache, PackedValuesCache, type TypedOracle } from '@aztec/simulator'; +import { NoopTelemetryClient } from '@aztec/telemetry-client/noop'; import { MerkleTrees } from '@aztec/world-state'; import { TXE } from '../oracle/txe_oracle.js'; @@ -38,7 +39,7 @@ export class TXEService { static async init(logger: Logger) { const store = openTmpStore(true); - const trees = await MerkleTrees.new(store, logger); + const trees = await MerkleTrees.new(store, new NoopTelemetryClient(), logger); const packedValuesCache = new PackedValuesCache(); const txHash = new Fr(1); // The txHash is used for computing the revertible nullifiers for non-revertible note hashes. It can be any value for testing. const noteCache = new ExecutionNoteCache(txHash); diff --git a/yarn-project/world-state/package.json b/yarn-project/world-state/package.json index 2f2b53a1cdf..e731926eca0 100644 --- a/yarn-project/world-state/package.json +++ b/yarn-project/world-state/package.json @@ -58,6 +58,7 @@ "@aztec/foundation": "workspace:^", "@aztec/kv-store": "workspace:^", "@aztec/merkle-tree": "workspace:^", + "@aztec/telemetry-client": "workspace:^", "@aztec/types": "workspace:^", "tslib": "^2.4.0" }, diff --git a/yarn-project/world-state/src/synchronizer/factory.ts b/yarn-project/world-state/src/synchronizer/factory.ts index ed49fe94f36..e2d37d4a542 100644 --- a/yarn-project/world-state/src/synchronizer/factory.ts +++ b/yarn-project/world-state/src/synchronizer/factory.ts @@ -1,5 +1,6 @@ import { type L1ToL2MessageSource, type L2BlockSource } from '@aztec/circuit-types'; import { type AztecKVStore } from '@aztec/kv-store'; +import { type TelemetryClient } from '@aztec/telemetry-client'; import { MerkleTrees } from '../world-state-db/merkle_trees.js'; import { type WorldStateConfig } from './config.js'; @@ -9,7 +10,8 @@ export async function createWorldStateSynchronizer( config: WorldStateConfig, store: AztecKVStore, l2BlockSource: L2BlockSource & L1ToL2MessageSource, + client: TelemetryClient, ) { - const merkleTrees = await MerkleTrees.new(store); + const merkleTrees = await MerkleTrees.new(store, client); return new ServerWorldStateSynchronizer(store, merkleTrees, l2BlockSource, config); } diff --git a/yarn-project/world-state/src/world-state-db/merkle_trees.ts b/yarn-project/world-state/src/world-state-db/merkle_trees.ts index 6f41f377f14..1c9bd5b6877 100644 --- a/yarn-project/world-state/src/world-state-db/merkle_trees.ts +++ b/yarn-project/world-state/src/world-state-db/merkle_trees.ts @@ -32,6 +32,7 @@ import { import { padArrayEnd } from '@aztec/foundation/collection'; import { type DebugLogger, createDebugLogger } from '@aztec/foundation/log'; import { SerialQueue } from '@aztec/foundation/queue'; +import { Timer, elapsed } from '@aztec/foundation/timer'; import { type IndexedTreeLeafPreimage } from '@aztec/foundation/trees'; import { type AztecKVStore, type AztecSingleton } from '@aztec/kv-store'; import { @@ -45,6 +46,7 @@ import { loadTree, newTree, } from '@aztec/merkle-tree'; +import { type TelemetryClient } from '@aztec/telemetry-client'; import { type Hasher } from '@aztec/types/interfaces'; import { @@ -55,6 +57,7 @@ import { } from './merkle_tree_db.js'; import { type MerkleTreeMap } from './merkle_tree_map.js'; import { MerkleTreeOperationsFacade } from './merkle_tree_operations_facade.js'; +import { WorldStateMetrics } from './metrics.js'; /** * The nullifier tree is an indexed tree. @@ -98,9 +101,11 @@ export class MerkleTrees implements MerkleTreeDb { private trees: MerkleTreeMap = null as any; private jobQueue = new SerialQueue(); private initialStateReference: AztecSingleton; + private metrics: WorldStateMetrics; - private constructor(private store: AztecKVStore, private log: DebugLogger) { + private constructor(private store: AztecKVStore, private telemetryClient: TelemetryClient, private log: DebugLogger) { this.initialStateReference = store.openSingleton('merkle_trees_initial_state_reference'); + this.metrics = new WorldStateMetrics(telemetryClient); } /** @@ -108,8 +113,8 @@ export class MerkleTrees implements MerkleTreeDb { * @param store - The db instance to use for data persistance. * @returns - A fully initialized MerkleTrees instance. */ - public static async new(store: AztecKVStore, log = createDebugLogger('aztec:merkle_trees')) { - const merkleTrees = new MerkleTrees(store, log); + public static async new(store: AztecKVStore, client: TelemetryClient, log = createDebugLogger('aztec:merkle_trees')) { + const merkleTrees = new MerkleTrees(store, client, log); await merkleTrees.#init(); return merkleTrees; } @@ -181,12 +186,17 @@ export class MerkleTrees implements MerkleTreeDb { } public async fork(): Promise { - // TODO(palla/prover-node): If the underlying store is being shared with other components, we're unnecessarily - // copying a lot of data unrelated to merkle trees. This may be fine for now, and we may be able to ditch backup-based - // forking in favor of a more elegant proposal. But if we see this operation starts taking a lot of time, we may want - // to open separate stores for merkle trees and other components. - const forked = await this.store.fork(); - return MerkleTrees.new(forked, this.log); + const [ms, db] = await elapsed(async () => { + // TODO(palla/prover-node): If the underlying store is being shared with other components, we're unnecessarily + // copying a lot of data unrelated to merkle trees. This may be fine for now, and we may be able to ditch backup-based + // forking in favor of a more elegant proposal. But if we see this operation starts taking a lot of time, we may want + // to open separate stores for merkle trees and other components. + const forked = await this.store.fork(); + return MerkleTrees.new(forked, this.telemetryClient, this.log); + }); + + this.metrics.recordForkDuration(ms); + return db; } public async delete() { @@ -581,6 +591,8 @@ export class MerkleTrees implements MerkleTreeDb { * @param l1ToL2Messages - The L1 to L2 messages for the block. */ async #handleL2BlockAndMessages(l2Block: L2Block, l1ToL2Messages: Fr[]): Promise { + const timer = new Timer(); + const treeRootWithIdPairs = [ [l2Block.header.state.partial.nullifierTree.root, MerkleTreeId.NULLIFIER_TREE], [l2Block.header.state.partial.noteHashTree.root, MerkleTreeId.NOTE_HASH_TREE], @@ -664,10 +676,13 @@ export class MerkleTrees implements MerkleTreeDb { ); } else { this.log.debug(`Tree ${treeName} synched with size ${info.size} root ${rootStr}`); + this.metrics.recordTreeSize(treeName, info.size); } } await this.#snapshot(l2Block.number); + this.metrics.recordDbSize(this.store.estimateSize().bytes); + this.metrics.recordSyncDuration(ourBlock ? 'commit' : 'rollback_and_update', timer); return { isBlockOurs: ourBlock }; } diff --git a/yarn-project/world-state/src/world-state-db/metrics.ts b/yarn-project/world-state/src/world-state-db/metrics.ts new file mode 100644 index 00000000000..d7965590e1f --- /dev/null +++ b/yarn-project/world-state/src/world-state-db/metrics.ts @@ -0,0 +1,64 @@ +import { type Timer } from '@aztec/foundation/timer'; +import { + Attributes, + type Gauge, + type Histogram, + Metrics, + type TelemetryClient, + ValueType, +} from '@aztec/telemetry-client'; + +export class WorldStateMetrics { + private treeSize: Gauge; + private dbSize: Gauge; + private forkDuration: Histogram; + private syncDuration: Histogram; + + constructor(client: TelemetryClient, name = 'MerkleTreesDb') { + const meter = client.getMeter(name); + this.treeSize = meter.createGauge(Metrics.WORLD_STATE_MERKLE_TREE_SIZE, { + description: 'The size of Merkle trees', + valueType: ValueType.INT, + }); + + this.dbSize = meter.createGauge(Metrics.WORLD_STATE_DB_SIZE, { + description: 'The size of the World State DB', + valueType: ValueType.INT, + unit: 'By', + }); + + this.forkDuration = meter.createHistogram(Metrics.WORLD_STATE_FORK_DURATION, { + description: 'The duration of a fork operation', + unit: 'ms', + valueType: ValueType.INT, + }); + + this.syncDuration = meter.createHistogram(Metrics.WORLD_STATE_SYNC_DURATION, { + description: 'The duration of a sync operation', + unit: 'ms', + valueType: ValueType.INT, + }); + } + + recordTreeSize(treeName: string, treeSize: bigint) { + this.treeSize.record(Number(treeSize), { + [Attributes.MERKLE_TREE_NAME]: treeName, + }); + } + + recordDbSize(dbSizeInBytes: number) { + this.dbSize.record(dbSizeInBytes); + } + + recordForkDuration(timerOrMs: Timer | number) { + const ms = Math.ceil(typeof timerOrMs === 'number' ? timerOrMs : timerOrMs.ms()); + this.forkDuration.record(ms); + } + + recordSyncDuration(syncType: 'commit' | 'rollback_and_update', timerOrMs: Timer | number) { + const ms = Math.ceil(typeof timerOrMs === 'number' ? timerOrMs : timerOrMs.ms()); + this.syncDuration.record(ms, { + [Attributes.STATUS]: syncType, + }); + } +} diff --git a/yarn-project/world-state/tsconfig.json b/yarn-project/world-state/tsconfig.json index 0088e438260..51c22f31bc5 100644 --- a/yarn-project/world-state/tsconfig.json +++ b/yarn-project/world-state/tsconfig.json @@ -21,6 +21,9 @@ { "path": "../merkle-tree" }, + { + "path": "../telemetry-client" + }, { "path": "../types" } diff --git a/yarn-project/yarn.lock b/yarn-project/yarn.lock index 4050f3da274..2f89126d3f8 100644 --- a/yarn-project/yarn.lock +++ b/yarn-project/yarn.lock @@ -1208,6 +1208,7 @@ __metadata: "@aztec/foundation": "workspace:^" "@aztec/kv-store": "workspace:^" "@aztec/merkle-tree": "workspace:^" + "@aztec/telemetry-client": "workspace:^" "@aztec/types": "workspace:^" "@jest/globals": ^29.5.0 "@types/jest": ^29.5.0