From 99a7c658fbf557a453e5f4eb0a2810229565281c Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Tue, 4 Jun 2024 11:13:22 -0700 Subject: [PATCH 1/5] Add batchPoster backlog metric --- arbnode/batch_poster.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index 058db160c8..2a0f3a1ec8 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -66,6 +66,8 @@ var ( blobGasLimitGauge = metrics.NewRegisteredGauge("arb/batchposter/blobgas/limit", nil) suggestedTipCapGauge = metrics.NewRegisteredGauge("arb/batchposter/suggestedtipcap", nil) + batchPosterBacklogGauge = metrics.NewRegisteredGauge("arb/batchposter/backlog", nil) + usableBytesInBlob = big.NewInt(int64(len(kzg4844.Blob{}) * 31 / 32)) blobTxBlobGasPerBlob = big.NewInt(params.BlobTxBlobGasPerBlob) ) @@ -1347,6 +1349,7 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) messagesPerBatch = 1 } backlog := uint64(unpostedMessages) / messagesPerBatch + batchPosterBacklogGauge.Update(int64(backlog)) if backlog > 10 { logLevel := log.Warn if recentlyHitL1Bounds { From 390523713d86a6195768dae0b8e83270191bb364 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Tue, 4 Jun 2024 11:19:58 -0700 Subject: [PATCH 2/5] Add metrics around DA that can be reused for various DA plugins (celestia, eigenda, anytrust, etc) --- arbnode/batch_poster.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index 2a0f3a1ec8..df5c145f0c 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -68,6 +68,10 @@ var ( batchPosterBacklogGauge = metrics.NewRegisteredGauge("arb/batchposter/backlog", nil) + batchPosterDALastSuccessfulActionGauge = metrics.NewRegisteredGauge("arb/batchPoster/action/da_last_success", nil) + batchPosterDASuccessCounter = metrics.NewRegisteredCounter("arb/batchPoster/action/da_success", nil) + batchPosterDAFailureCounter = metrics.NewRegisteredCounter("arb/batchPoster/action/da_failure", nil) + usableBytesInBlob = big.NewInt(int64(len(kzg4844.Blob{}) * 31 / 32)) blobTxBlobGasPerBlob = big.NewInt(params.BlobTxBlobGasPerBlob) ) @@ -1256,15 +1260,21 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) gotNonce, gotMeta, err := b.dataPoster.GetNextNonceAndMeta(ctx) if err != nil { + batchPosterDAFailureCounter.Inc(1) return false, err } if nonce != gotNonce || !bytes.Equal(batchPositionBytes, gotMeta) { + batchPosterDAFailureCounter.Inc(1) return false, fmt.Errorf("%w: nonce changed from %d to %d while creating batch", storage.ErrStorageRace, nonce, gotNonce) } sequencerMsg, err = b.dapWriter.Store(ctx, sequencerMsg, uint64(time.Now().Add(config.DASRetentionPeriod).Unix()), []byte{}, config.DisableDapFallbackStoreDataOnChain) if err != nil { + batchPosterDAFailureCounter.Inc(1) return false, err } + + batchPosterDASuccessCounter.Inc(1) + batchPosterDALastSuccessfulActionGauge.Update(time.Now().Unix()) } data, kzgBlobs, err := b.encodeAddBatch(new(big.Int).SetUint64(batchPosition.NextSeqNum), batchPosition.MessageCount, b.building.msgCount, sequencerMsg, b.building.segments.delayedMsg, b.building.use4844) From 95d6ee7ebf40d69bee97e1c3505cc85ad4d85838 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Tue, 4 Jun 2024 17:05:38 -0700 Subject: [PATCH 3/5] Add metric around batch poster failures --- arbnode/batch_poster.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index df5c145f0c..bb4e03bf05 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -72,6 +72,8 @@ var ( batchPosterDASuccessCounter = metrics.NewRegisteredCounter("arb/batchPoster/action/da_success", nil) batchPosterDAFailureCounter = metrics.NewRegisteredCounter("arb/batchPoster/action/da_failure", nil) + batchPosterFailureCounter = metrics.NewRegisteredCounter("arb/batchPoster/action/failure", nil) + usableBytesInBlob = big.NewInt(int64(len(kzg4844.Blob{}) * 31 / 32)) blobTxBlobGasPerBlob = big.NewInt(params.BlobTxBlobGasPerBlob) ) @@ -1043,7 +1045,7 @@ const ethPosBlockTime = 12 * time.Second var errAttemptLockFailed = errors.New("failed to acquire lock; either another batch poster posted a batch or this node fell behind") -func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) { +func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (ret bool, err error) { if b.batchReverted.Load() { return false, fmt.Errorf("batch was reverted, not posting any more batches") } @@ -1243,6 +1245,13 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) // don't post anything for now return false, nil } + + defer func() { + if err != nil { + batchPosterFailureCounter.Inc(1) + } + }() + sequencerMsg, err := b.building.segments.CloseAndGetBytes() if err != nil { return false, err From b2b53eb6644a1f8668a32039f2257a07f80a71bf Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Wed, 5 Jun 2024 08:38:02 -0700 Subject: [PATCH 4/5] Add failure at the top --- arbnode/batch_poster.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index bb4e03bf05..6af1c5471c 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -1046,6 +1046,12 @@ const ethPosBlockTime = 12 * time.Second var errAttemptLockFailed = errors.New("failed to acquire lock; either another batch poster posted a batch or this node fell behind") func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (ret bool, err error) { + defer func() { + if err != nil { + batchPosterFailureCounter.Inc(1) + } + }() + if b.batchReverted.Load() { return false, fmt.Errorf("batch was reverted, not posting any more batches") } @@ -1246,12 +1252,6 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (ret bool, er return false, nil } - defer func() { - if err != nil { - batchPosterFailureCounter.Inc(1) - } - }() - sequencerMsg, err := b.building.segments.CloseAndGetBytes() if err != nil { return false, err From 89b5611665301fd60f76c4daf1ea6f77aa8c7f2d Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Fri, 21 Jun 2024 09:00:08 -0700 Subject: [PATCH 5/5] Address comments --- arbnode/batch_poster.go | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index 6af1c5471c..478bbd395e 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -66,7 +66,7 @@ var ( blobGasLimitGauge = metrics.NewRegisteredGauge("arb/batchposter/blobgas/limit", nil) suggestedTipCapGauge = metrics.NewRegisteredGauge("arb/batchposter/suggestedtipcap", nil) - batchPosterBacklogGauge = metrics.NewRegisteredGauge("arb/batchposter/backlog", nil) + batchPosterEstimatedBatchBacklogGauge = metrics.NewRegisteredGauge("arb/batchposter/estimated_batch_backlog", nil) batchPosterDALastSuccessfulActionGauge = metrics.NewRegisteredGauge("arb/batchPoster/action/da_last_success", nil) batchPosterDASuccessCounter = metrics.NewRegisteredCounter("arb/batchPoster/action/da_success", nil) @@ -1045,13 +1045,7 @@ const ethPosBlockTime = 12 * time.Second var errAttemptLockFailed = errors.New("failed to acquire lock; either another batch poster posted a batch or this node fell behind") -func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (ret bool, err error) { - defer func() { - if err != nil { - batchPosterFailureCounter.Inc(1) - } - }() - +func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) { if b.batchReverted.Load() { return false, fmt.Errorf("batch was reverted, not posting any more batches") } @@ -1368,7 +1362,7 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (ret bool, er messagesPerBatch = 1 } backlog := uint64(unpostedMessages) / messagesPerBatch - batchPosterBacklogGauge.Update(int64(backlog)) + batchPosterEstimatedBatchBacklogGauge.Update(int64(backlog)) if backlog > 10 { logLevel := log.Warn if recentlyHitL1Bounds { @@ -1481,6 +1475,7 @@ func (b *BatchPoster) Start(ctxIn context.Context) { logLevel = normalGasEstimationFailedEphemeralErrorHandler.LogLevel(err, logLevel) logLevel = accumulatorNotFoundEphemeralErrorHandler.LogLevel(err, logLevel) logLevel("error posting batch", "err", err) + batchPosterFailureCounter.Inc(1) return b.config().ErrorDelay } else if posted { return 0