From 98b99aa5c8b2f592a84b0fd7d1bc197f44910aa2 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 16 Apr 2018 17:09:57 -0400 Subject: [PATCH 1/2] rpc: ConnHealth now kicks off a connection attempt When #22658 changed ConnHealth to be pessimistic instead of optimistic, it meant that distsql could theoretically get stuck in a state without connections to the necessary nodes (distsql would never initiate connections on its own; it only attempts to use connections for which ConnHealth returns true so it was effectively relying on raft/kv to initiate these connections). Now ConnHealth will attempt to start a connection process if none is in flight to ensure that we will eventually discover the health of any address we are concerned about. Updated the ConnHealth docstring to reflect this change and the change to pessimistic behavior. Fixes #23829 Release note: None --- pkg/kv/transport.go | 2 +- pkg/rpc/context.go | 34 ++++++++++------------------- pkg/rpc/context_test.go | 19 ++++++++-------- pkg/sql/distsql_physical_planner.go | 2 +- 4 files changed, 22 insertions(+), 35 deletions(-) diff --git a/pkg/kv/transport.go b/pkg/kv/transport.go index 144eb5131e91..8ea0041233ff 100644 --- a/pkg/kv/transport.go +++ b/pkg/kv/transport.go @@ -129,7 +129,7 @@ func grpcTransportFactoryImpl( }) } - // Put known-unhealthy clients last. + // Put known-healthy clients first. splitHealthy(clients) return &grpcTransport{ diff --git a/pkg/rpc/context.go b/pkg/rpc/context.go index dc9415e9d95f..3e157872c4ce 100644 --- a/pkg/rpc/context.go +++ b/pkg/rpc/context.go @@ -529,38 +529,26 @@ func (ctx *Context) NewBreaker() *circuit.Breaker { return newBreaker(&ctx.breakerClock) } -// ErrNotConnected is returned by ConnHealth when there is no connection to the -// host (e.g. GRPCDial was never called for that address, or a connection has -// been closed and not reconnected). -var ErrNotConnected = errors.New("not connected") - // ErrNotHeartbeated is returned by ConnHealth when we have not yet performed // the first heartbeat. var ErrNotHeartbeated = errors.New("not yet heartbeated") -// ConnHealth returns whether the most recent heartbeat succeeded or not. -// This should not be used as a definite status of a node's health and just used -// to prioritize healthy nodes over unhealthy ones. -// -// NB: as of #22658, this does not work as you think. We kick -// connections out of the connection pool as soon as they run into an -// error, at which point their ConnHealth will reset to -// ErrNotConnected. ConnHealth does no more return a sane notion of -// "recent connection health". When it returns nil all seems well, but -// if it doesn't then this may mean that the node is simply refusing -// connections (and is thus unconnected most of the time), or that the -// node hasn't been connected to but is perfectly healthy. -// -// See #23829. +// ConnHealth returns nil if we have an open connection to the given +// target that succeeded on its most recent heartbeat. Otherwise, it +// kicks off a connection attempt (unless one is already in progress +// or we are in a backoff state) and returns an error (typically +// ErrNotHeartbeated). This is a conservative/pessimistic indicator: +// if we have not attempted to talk to the target node recently, an +// error will be returned. This method should therefore be used to +// prioritize among a list of candidate nodes, but not to filter out +// "unhealthy" nodes. func (ctx *Context) ConnHealth(target string) error { if ctx.GetLocalInternalServerForAddr(target) != nil { // The local server is always considered healthy. return nil } - if value, ok := ctx.conns.Load(target); ok { - return value.(*Connection).heartbeatResult.Load().(heartbeatResult).err - } - return ErrNotConnected + conn := ctx.GRPCDial(target) + return conn.heartbeatResult.Load().(heartbeatResult).err } func (ctx *Context) runHeartbeat( diff --git a/pkg/rpc/context_test.go b/pkg/rpc/context_test.go index 6587fe1ba2d0..2579cc926c4c 100644 --- a/pkg/rpc/context_test.go +++ b/pkg/rpc/context_test.go @@ -254,18 +254,18 @@ func TestHeartbeatHealth(t *testing.T) { return clientCtx.ConnHealth(remoteAddr) }) - if err := clientCtx.ConnHealth("non-existent connection"); err != ErrNotConnected { - t.Errorf("wanted ErrConnected, not %v", err) + if err := clientCtx.ConnHealth("non-existent connection"); err != ErrNotHeartbeated { + t.Errorf("wanted ErrNotHeartbeated, not %v", err) } - if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotConnected { - t.Errorf("wanted ErrConnected, not %v", err) + if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotHeartbeated { + t.Errorf("wanted ErrNotHeartbeated, not %v", err) } clientCtx.SetLocalInternalServer(&internalServer{}) - if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotConnected { - t.Errorf("wanted ErrConnected, not %v", err) + if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotHeartbeated { + t.Errorf("wanted ErrNotHeartbeated, not %v", err) } if err := clientCtx.ConnHealth(clientCtx.AdvertiseAddr); err != nil { t.Error(err) @@ -378,11 +378,10 @@ func TestHeartbeatHealthTransport(t *testing.T) { isUnhealthy := func(err error) bool { // Most of the time, an unhealthy connection will get - // ErrNotConnected, but there are brief periods during which we - // could get ErrNotHeartbeated (while we're trying to start a new - // connection) or one of the grpc errors below (while the old + // ErrNotHeartbeated, but there are brief periods during which we + // could get one of the grpc errors below (while the old // connection is in the middle of closing). - if err == ErrNotConnected || err == ErrNotHeartbeated { + if err == ErrNotHeartbeated { return true } // The expected code here is Unavailable, but at least on OSX you can also get diff --git a/pkg/sql/distsql_physical_planner.go b/pkg/sql/distsql_physical_planner.go index ffe8ae1385d3..79a3f119a80b 100644 --- a/pkg/sql/distsql_physical_planner.go +++ b/pkg/sql/distsql_physical_planner.go @@ -578,7 +578,7 @@ func checkNodeHealth( err = connHealth(addr) } - if err != nil && err != rpc.ErrNotConnected && err != rpc.ErrNotHeartbeated { + if err != nil && err != rpc.ErrNotHeartbeated { // This host is known to be unhealthy. Don't use it (use the gateway // instead). Note: this can never happen for our nodeID (which // always has its address in the nodeMap). From d18eb07a6a409fa1b6941545d6edbaf48cf8da4f Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Mon, 16 Apr 2018 18:19:05 -0400 Subject: [PATCH 2/2] ui, sql: fix display of "database dropped" event. The DROP_DATABASE event in `system.eventlog` was changed from having the key `DroppedTables` to having the key `DroppedTablesAndViews`. The corresponding UI code which displays the dropped tables was not changed, creating bug #18523. This commit fixes #18523 by renaming the key to `DroppedSchemaObjects`, both in the event and in the UI code. The term "Schema Objects" accounts for the existence of sequences in addition to views and tables. Release note (admin ui change): display the names of dropped schema objects on `DROP DATABASE ... CASCADE` --- pkg/sql/drop_database.go | 8 +-- .../logictest/testdata/logic_test/event_log | 4 +- pkg/ui/src/util/events.spec.ts | 37 ++++++++++++ pkg/ui/src/util/events.ts | 56 ++++++++++++------- 4 files changed, 79 insertions(+), 26 deletions(-) create mode 100644 pkg/ui/src/util/events.spec.ts diff --git a/pkg/sql/drop_database.go b/pkg/sql/drop_database.go index 0234444ad267..c2a8d6c73fbc 100644 --- a/pkg/sql/drop_database.go +++ b/pkg/sql/drop_database.go @@ -172,10 +172,10 @@ func (n *dropDatabaseNode) startExec(params runParams) error { int32(n.dbDesc.ID), int32(params.extendedEvalCtx.NodeID), struct { - DatabaseName string - Statement string - User string - DroppedTablesAndViews []string + DatabaseName string + Statement string + User string + DroppedSchemaObjects []string }{n.n.Name.String(), n.n.String(), p.SessionData().User, tbNameStrings}, ) } diff --git a/pkg/sql/logictest/testdata/logic_test/event_log b/pkg/sql/logictest/testdata/logic_test/event_log index 6aa65b9a8478..22a037445c34 100644 --- a/pkg/sql/logictest/testdata/logic_test/event_log +++ b/pkg/sql/logictest/testdata/logic_test/event_log @@ -292,7 +292,7 @@ DROP DATABASE IF EXISTS othereventlogtest CASCADE # verify event is there, and cascading table drops are logged. query IIT -SELECT "targetID", "reportingID", info::JSONB->>'DroppedTablesAndViews' +SELECT "targetID", "reportingID", info::JSONB->>'DroppedSchemaObjects' FROM system.eventlog WHERE "eventType" = 'drop_database' AND info::JSONB->>'Statement' LIKE 'DROP DATABASE eventlogtest%' @@ -300,7 +300,7 @@ WHERE "eventType" = 'drop_database' 53 1 ["eventlogtest.public.anothertesttable", "eventlogtest.public.testtable"] query IIT -SELECT "targetID", "reportingID", info::JSONB->>'DroppedTablesAndViews' +SELECT "targetID", "reportingID", info::JSONB->>'DroppedSchemaObjects' FROM system.eventlog WHERE "eventType" = 'drop_database' AND info::JSONB->>'Statement' LIKE 'DROP DATABASE IF EXISTS othereventlogtest%' diff --git a/pkg/ui/src/util/events.spec.ts b/pkg/ui/src/util/events.spec.ts new file mode 100644 index 000000000000..c75a321b122d --- /dev/null +++ b/pkg/ui/src/util/events.spec.ts @@ -0,0 +1,37 @@ +import { assert } from "chai"; +import { EventInfo, getDroppedObjectsText } from "src/util/events"; + +describe("getDroppedObjectsText", function() { + + // The key indicating which objects were dropped in a DROP_DATABASE event has been + // renamed multiple times, creating bugs (e.g. #18523). This test won't fail if the + // key is renamed again on the Go side, but it at least tests that we can handle all + // existing versions. + it("returns a sentence for all versions of the dropped objects key", function() { + const commonProperties: EventInfo = { + User: "root", + DatabaseName: "foo", + }; + const versions: EventInfo[] = [ + { + ...commonProperties, + DroppedTables: ["foo", "bar"], + }, + { + ...commonProperties, + DroppedTablesAndViews: ["foo", "bar"], + }, + { + ...commonProperties, + DroppedSchemaObjects: ["foo", "bar"], + }, + ]; + + const expected = "2 schema objects were dropped: foo, bar"; + + versions.forEach((eventInfoVersion) => { + assert.equal(expected, getDroppedObjectsText(eventInfoVersion)); + }); + }); + +}); diff --git a/pkg/ui/src/util/events.ts b/pkg/ui/src/util/events.ts index c6fae2c5e57c..ea5c071d993a 100644 --- a/pkg/ui/src/util/events.ts +++ b/pkg/ui/src/util/events.ts @@ -9,32 +9,15 @@ type Event$Properties = protos.cockroach.server.serverpb.EventsResponse.Event$Pr * getEventDescription returns a short summary of an event. */ export function getEventDescription(e: Event$Properties): string { - const info: { - DatabaseName: string, - DroppedTables: string[], - IndexName: string, - MutationID: string, - TableName: string, - User: string, - ViewName: string, - SequenceName: string, - SettingName: string, - Value: string, - } = protobuf.util.isset(e, "info") ? JSON.parse(e.info) : {}; + const info: EventInfo = protobuf.util.isset(e, "info") ? JSON.parse(e.info) : {}; const targetId: number = e.target_id ? e.target_id.toNumber() : null; switch (e.event_type) { case eventTypes.CREATE_DATABASE: return `Database Created: User ${info.User} created database ${info.DatabaseName}`; case eventTypes.DROP_DATABASE: - info.DroppedTables = info.DroppedTables || []; - let tableDropText: string = `${info.DroppedTables.length} tables were dropped: ${info.DroppedTables.join(", ")}`; - if (info.DroppedTables.length === 0) { - tableDropText = "No tables were dropped."; - } else if (info.DroppedTables.length === 1) { - tableDropText = `1 table was dropped: ${info.DroppedTables[0]}`; - } - return `Database Dropped: User ${info.User} dropped database ${info.DatabaseName}.${tableDropText}`; + const tableDropText = getDroppedObjectsText(info); + return `Database Dropped: User ${info.User} dropped database ${info.DatabaseName}. ${tableDropText}`; case eventTypes.CREATE_TABLE: return `Table Created: User ${info.User} created table ${info.TableName}`; case eventTypes.DROP_TABLE: @@ -80,3 +63,36 @@ export function getEventDescription(e: Event$Properties): string { return `Unknown Event Type: ${e.event_type}, content: ${JSON.stringify(info, null, 2)}`; } } + +// EventInfo corresponds to the `info` column of the `system.eventlog` table +// and the `info` field of the `server.serverpb.EventsResponse.Event` proto. +export interface EventInfo { + User: string; + DatabaseName?: string; + TableName?: string; + IndexName?: string; + MutationID?: string; + ViewName?: string; + SequenceName?: string; + SettingName?: string; + Value?: string; + // The following are three names for the same key (it was renamed twice). + // All ar included for backwards compatibility. + DroppedTables?: string[]; + DroppedTablesAndViews?: string[]; + DroppedSchemaObjects?: string[]; +} + +export function getDroppedObjectsText(eventInfo: EventInfo): string { + const droppedObjects = + eventInfo.DroppedSchemaObjects || eventInfo.DroppedTablesAndViews || eventInfo.DroppedTables; + if (!droppedObjects) { + return ""; + } + if (droppedObjects.length === 0) { + return "No schema objects were dropped."; + } else if (droppedObjects.length === 1) { + return `1 schema object was dropped: ${droppedObjects[0]}`; + } + return `${droppedObjects.length} schema objects were dropped: ${droppedObjects.join(", ")}`; +}