From 07cb344e58fccc418c6f97dd66a5496b162fe22a Mon Sep 17 00:00:00 2001 From: Yevgeniy Miretskiy Date: Thu, 25 Aug 2022 11:05:33 -0400 Subject: [PATCH] kv: Include error information in `crdb_internal.active_range_feeds` Include error count, and the last error information in `crdb_internal.active_range_feeds` table whenever rangefeed disconnects due to an error. Release justification: observability improvement. Release note: None --- pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go | 12 ++++++++++++ pkg/sql/crdb_internal.go | 12 +++++++++++- .../logictest/testdata/logic_test/create_statements | 8 ++++++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go b/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go index 8ea19348e216..aeab508e8a01 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go @@ -225,6 +225,8 @@ type PartialRangeFeed struct { CreatedTime time.Time LastValueReceived time.Time Resolved hlc.Timestamp + NumErrs int + LastErr error } // ActiveRangeFeedIterFn is an iterator function which is passed PartialRangeFeed structure. @@ -280,6 +282,14 @@ func (a *activeRangeFeed) onRangeEvent( a.RangeID = rangeID } +func (a *activeRangeFeed) setLastError(err error) { + a.Lock() + defer a.Unlock() + a.LastErr = errors.Wrapf(err, "disconnect at %s: checkpoint %s/-%s", + timeutil.Now().Format(time.RFC3339), a.Resolved, timeutil.Since(a.Resolved.GoTime())) + a.NumErrs++ +} + // rangeFeedRegistry is responsible for keeping track of currently executing // range feeds. type rangeFeedRegistry struct { @@ -389,6 +399,8 @@ func (ds *DistSender) partialRangeFeed( startAfter.Forward(maxTS) if err != nil { + active.setLastError(err) + if log.V(1) { log.Infof(ctx, "RangeFeed %s disconnected with last checkpoint %s ago: %v", span, timeutil.Since(startAfter.GoTime()), err) diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index d8a7ee973a36..37b0d410983f 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -5664,7 +5664,9 @@ CREATE TABLE crdb_internal.active_range_feeds ( range_start STRING, range_end STRING, resolved STRING, - last_event_utc INT + last_event_utc INT, + num_errs INT, + last_err STRING );`, populate: func(ctx context.Context, p *planner, _ catalog.DatabaseDescriptor, addRow func(...tree.Datum) error) error { return p.execCfg.DistSender.ForEachActiveRangeFeed( @@ -5675,6 +5677,12 @@ CREATE TABLE crdb_internal.active_range_feeds ( } else { lastEvent = tree.NewDInt(tree.DInt(rf.LastValueReceived.UTC().UnixNano())) } + var lastErr tree.Datum + if rf.LastErr == nil { + lastErr = tree.DNull + } else { + lastErr = tree.NewDString(rf.LastErr.Error()) + } return addRow( tree.NewDInt(tree.DInt(rfCtx.ID)), @@ -5688,6 +5696,8 @@ CREATE TABLE crdb_internal.active_range_feeds ( tree.NewDString(keys.PrettyPrint(nil /* valDirs */, rf.Span.EndKey)), tree.NewDString(rf.Resolved.AsOfSystemTime()), lastEvent, + tree.NewDInt(tree.DInt(rf.NumErrs)), + lastErr, ) }, ) diff --git a/pkg/sql/logictest/testdata/logic_test/create_statements b/pkg/sql/logictest/testdata/logic_test/create_statements index 7c528e201e62..319b5df3040f 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_statements +++ b/pkg/sql/logictest/testdata/logic_test/create_statements @@ -43,7 +43,9 @@ CREATE TABLE crdb_internal.active_range_feeds ( range_start STRING NULL, range_end STRING NULL, resolved STRING NULL, - last_event_utc INT8 NULL + last_event_utc INT8 NULL, + num_errs INT8 NULL, + last_err STRING NULL ) CREATE TABLE crdb_internal.active_range_feeds ( id INT8 NULL, tags STRING NULL, @@ -55,7 +57,9 @@ CREATE TABLE crdb_internal.active_range_feeds ( range_start STRING NULL, range_end STRING NULL, resolved STRING NULL, - last_event_utc INT8 NULL + last_event_utc INT8 NULL, + num_errs INT8 NULL, + last_err STRING NULL ) {} {} CREATE TABLE crdb_internal.backward_dependencies ( descriptor_id INT8 NULL,