diff --git a/pkg/kv/transport.go b/pkg/kv/transport.go index 144eb5131e91..8ea0041233ff 100644 --- a/pkg/kv/transport.go +++ b/pkg/kv/transport.go @@ -129,7 +129,7 @@ func grpcTransportFactoryImpl( }) } - // Put known-unhealthy clients last. + // Put known-healthy clients first. splitHealthy(clients) return &grpcTransport{ diff --git a/pkg/rpc/context.go b/pkg/rpc/context.go index dc9415e9d95f..3e157872c4ce 100644 --- a/pkg/rpc/context.go +++ b/pkg/rpc/context.go @@ -529,38 +529,26 @@ func (ctx *Context) NewBreaker() *circuit.Breaker { return newBreaker(&ctx.breakerClock) } -// ErrNotConnected is returned by ConnHealth when there is no connection to the -// host (e.g. GRPCDial was never called for that address, or a connection has -// been closed and not reconnected). -var ErrNotConnected = errors.New("not connected") - // ErrNotHeartbeated is returned by ConnHealth when we have not yet performed // the first heartbeat. var ErrNotHeartbeated = errors.New("not yet heartbeated") -// ConnHealth returns whether the most recent heartbeat succeeded or not. -// This should not be used as a definite status of a node's health and just used -// to prioritize healthy nodes over unhealthy ones. -// -// NB: as of #22658, this does not work as you think. We kick -// connections out of the connection pool as soon as they run into an -// error, at which point their ConnHealth will reset to -// ErrNotConnected. ConnHealth does no more return a sane notion of -// "recent connection health". When it returns nil all seems well, but -// if it doesn't then this may mean that the node is simply refusing -// connections (and is thus unconnected most of the time), or that the -// node hasn't been connected to but is perfectly healthy. -// -// See #23829. +// ConnHealth returns nil if we have an open connection to the given +// target that succeeded on its most recent heartbeat. Otherwise, it +// kicks off a connection attempt (unless one is already in progress +// or we are in a backoff state) and returns an error (typically +// ErrNotHeartbeated). This is a conservative/pessimistic indicator: +// if we have not attempted to talk to the target node recently, an +// error will be returned. This method should therefore be used to +// prioritize among a list of candidate nodes, but not to filter out +// "unhealthy" nodes. func (ctx *Context) ConnHealth(target string) error { if ctx.GetLocalInternalServerForAddr(target) != nil { // The local server is always considered healthy. return nil } - if value, ok := ctx.conns.Load(target); ok { - return value.(*Connection).heartbeatResult.Load().(heartbeatResult).err - } - return ErrNotConnected + conn := ctx.GRPCDial(target) + return conn.heartbeatResult.Load().(heartbeatResult).err } func (ctx *Context) runHeartbeat( diff --git a/pkg/rpc/context_test.go b/pkg/rpc/context_test.go index 6587fe1ba2d0..2579cc926c4c 100644 --- a/pkg/rpc/context_test.go +++ b/pkg/rpc/context_test.go @@ -254,18 +254,18 @@ func TestHeartbeatHealth(t *testing.T) { return clientCtx.ConnHealth(remoteAddr) }) - if err := clientCtx.ConnHealth("non-existent connection"); err != ErrNotConnected { - t.Errorf("wanted ErrConnected, not %v", err) + if err := clientCtx.ConnHealth("non-existent connection"); err != ErrNotHeartbeated { + t.Errorf("wanted ErrNotHeartbeated, not %v", err) } - if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotConnected { - t.Errorf("wanted ErrConnected, not %v", err) + if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotHeartbeated { + t.Errorf("wanted ErrNotHeartbeated, not %v", err) } clientCtx.SetLocalInternalServer(&internalServer{}) - if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotConnected { - t.Errorf("wanted ErrConnected, not %v", err) + if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotHeartbeated { + t.Errorf("wanted ErrNotHeartbeated, not %v", err) } if err := clientCtx.ConnHealth(clientCtx.AdvertiseAddr); err != nil { t.Error(err) @@ -378,11 +378,10 @@ func TestHeartbeatHealthTransport(t *testing.T) { isUnhealthy := func(err error) bool { // Most of the time, an unhealthy connection will get - // ErrNotConnected, but there are brief periods during which we - // could get ErrNotHeartbeated (while we're trying to start a new - // connection) or one of the grpc errors below (while the old + // ErrNotHeartbeated, but there are brief periods during which we + // could get one of the grpc errors below (while the old // connection is in the middle of closing). - if err == ErrNotConnected || err == ErrNotHeartbeated { + if err == ErrNotHeartbeated { return true } // The expected code here is Unavailable, but at least on OSX you can also get diff --git a/pkg/sql/distsql_physical_planner.go b/pkg/sql/distsql_physical_planner.go index ffe8ae1385d3..79a3f119a80b 100644 --- a/pkg/sql/distsql_physical_planner.go +++ b/pkg/sql/distsql_physical_planner.go @@ -578,7 +578,7 @@ func checkNodeHealth( err = connHealth(addr) } - if err != nil && err != rpc.ErrNotConnected && err != rpc.ErrNotHeartbeated { + if err != nil && err != rpc.ErrNotHeartbeated { // This host is known to be unhealthy. Don't use it (use the gateway // instead). Note: this can never happen for our nodeID (which // always has its address in the nodeMap). diff --git a/pkg/sql/drop_database.go b/pkg/sql/drop_database.go index 0234444ad267..c2a8d6c73fbc 100644 --- a/pkg/sql/drop_database.go +++ b/pkg/sql/drop_database.go @@ -172,10 +172,10 @@ func (n *dropDatabaseNode) startExec(params runParams) error { int32(n.dbDesc.ID), int32(params.extendedEvalCtx.NodeID), struct { - DatabaseName string - Statement string - User string - DroppedTablesAndViews []string + DatabaseName string + Statement string + User string + DroppedSchemaObjects []string }{n.n.Name.String(), n.n.String(), p.SessionData().User, tbNameStrings}, ) } diff --git a/pkg/sql/logictest/testdata/logic_test/event_log b/pkg/sql/logictest/testdata/logic_test/event_log index 6aa65b9a8478..22a037445c34 100644 --- a/pkg/sql/logictest/testdata/logic_test/event_log +++ b/pkg/sql/logictest/testdata/logic_test/event_log @@ -292,7 +292,7 @@ DROP DATABASE IF EXISTS othereventlogtest CASCADE # verify event is there, and cascading table drops are logged. query IIT -SELECT "targetID", "reportingID", info::JSONB->>'DroppedTablesAndViews' +SELECT "targetID", "reportingID", info::JSONB->>'DroppedSchemaObjects' FROM system.eventlog WHERE "eventType" = 'drop_database' AND info::JSONB->>'Statement' LIKE 'DROP DATABASE eventlogtest%' @@ -300,7 +300,7 @@ WHERE "eventType" = 'drop_database' 53 1 ["eventlogtest.public.anothertesttable", "eventlogtest.public.testtable"] query IIT -SELECT "targetID", "reportingID", info::JSONB->>'DroppedTablesAndViews' +SELECT "targetID", "reportingID", info::JSONB->>'DroppedSchemaObjects' FROM system.eventlog WHERE "eventType" = 'drop_database' AND info::JSONB->>'Statement' LIKE 'DROP DATABASE IF EXISTS othereventlogtest%' diff --git a/pkg/ui/src/util/events.spec.ts b/pkg/ui/src/util/events.spec.ts new file mode 100644 index 000000000000..c75a321b122d --- /dev/null +++ b/pkg/ui/src/util/events.spec.ts @@ -0,0 +1,37 @@ +import { assert } from "chai"; +import { EventInfo, getDroppedObjectsText } from "src/util/events"; + +describe("getDroppedObjectsText", function() { + + // The key indicating which objects were dropped in a DROP_DATABASE event has been + // renamed multiple times, creating bugs (e.g. #18523). This test won't fail if the + // key is renamed again on the Go side, but it at least tests that we can handle all + // existing versions. + it("returns a sentence for all versions of the dropped objects key", function() { + const commonProperties: EventInfo = { + User: "root", + DatabaseName: "foo", + }; + const versions: EventInfo[] = [ + { + ...commonProperties, + DroppedTables: ["foo", "bar"], + }, + { + ...commonProperties, + DroppedTablesAndViews: ["foo", "bar"], + }, + { + ...commonProperties, + DroppedSchemaObjects: ["foo", "bar"], + }, + ]; + + const expected = "2 schema objects were dropped: foo, bar"; + + versions.forEach((eventInfoVersion) => { + assert.equal(expected, getDroppedObjectsText(eventInfoVersion)); + }); + }); + +}); diff --git a/pkg/ui/src/util/events.ts b/pkg/ui/src/util/events.ts index c6fae2c5e57c..ea5c071d993a 100644 --- a/pkg/ui/src/util/events.ts +++ b/pkg/ui/src/util/events.ts @@ -9,32 +9,15 @@ type Event$Properties = protos.cockroach.server.serverpb.EventsResponse.Event$Pr * getEventDescription returns a short summary of an event. */ export function getEventDescription(e: Event$Properties): string { - const info: { - DatabaseName: string, - DroppedTables: string[], - IndexName: string, - MutationID: string, - TableName: string, - User: string, - ViewName: string, - SequenceName: string, - SettingName: string, - Value: string, - } = protobuf.util.isset(e, "info") ? JSON.parse(e.info) : {}; + const info: EventInfo = protobuf.util.isset(e, "info") ? JSON.parse(e.info) : {}; const targetId: number = e.target_id ? e.target_id.toNumber() : null; switch (e.event_type) { case eventTypes.CREATE_DATABASE: return `Database Created: User ${info.User} created database ${info.DatabaseName}`; case eventTypes.DROP_DATABASE: - info.DroppedTables = info.DroppedTables || []; - let tableDropText: string = `${info.DroppedTables.length} tables were dropped: ${info.DroppedTables.join(", ")}`; - if (info.DroppedTables.length === 0) { - tableDropText = "No tables were dropped."; - } else if (info.DroppedTables.length === 1) { - tableDropText = `1 table was dropped: ${info.DroppedTables[0]}`; - } - return `Database Dropped: User ${info.User} dropped database ${info.DatabaseName}.${tableDropText}`; + const tableDropText = getDroppedObjectsText(info); + return `Database Dropped: User ${info.User} dropped database ${info.DatabaseName}. ${tableDropText}`; case eventTypes.CREATE_TABLE: return `Table Created: User ${info.User} created table ${info.TableName}`; case eventTypes.DROP_TABLE: @@ -80,3 +63,36 @@ export function getEventDescription(e: Event$Properties): string { return `Unknown Event Type: ${e.event_type}, content: ${JSON.stringify(info, null, 2)}`; } } + +// EventInfo corresponds to the `info` column of the `system.eventlog` table +// and the `info` field of the `server.serverpb.EventsResponse.Event` proto. +export interface EventInfo { + User: string; + DatabaseName?: string; + TableName?: string; + IndexName?: string; + MutationID?: string; + ViewName?: string; + SequenceName?: string; + SettingName?: string; + Value?: string; + // The following are three names for the same key (it was renamed twice). + // All ar included for backwards compatibility. + DroppedTables?: string[]; + DroppedTablesAndViews?: string[]; + DroppedSchemaObjects?: string[]; +} + +export function getDroppedObjectsText(eventInfo: EventInfo): string { + const droppedObjects = + eventInfo.DroppedSchemaObjects || eventInfo.DroppedTablesAndViews || eventInfo.DroppedTables; + if (!droppedObjects) { + return ""; + } + if (droppedObjects.length === 0) { + return "No schema objects were dropped."; + } else if (droppedObjects.length === 1) { + return `1 schema object was dropped: ${droppedObjects[0]}`; + } + return `${droppedObjects.length} schema objects were dropped: ${droppedObjects.join(", ")}`; +}