Skip to content

Commit

Permalink
Merge #24845 #24852
Browse files Browse the repository at this point in the history
24845: rpc: ConnHealth now kicks off a connection attempt r=tschottdorf a=bdarnell

When #22658 changed ConnHealth to be pessimistic instead of
optimistic, it meant that distsql could theoretically get stuck in a
state without connections to the necessary nodes (distsql would never
initiate connections on its own; it only attempts to use connections
for which ConnHealth returns true so it was effectively relying on
raft/kv to initiate these connections).

Now ConnHealth will attempt to start a connection process if none is
in flight to ensure that we will eventually discover the health of any
address we are concerned about.

Updated the ConnHealth docstring to reflect this change and the change
to pessimistic behavior.

Fixes #23829

Release note: None

24852: ui, sql: fix display of "database dropped" event. r=couchand a=vilterp

The DROP_DATABASE event in `system.eventlog`  was changed from having the key `DroppedTables` to having the key `DroppedTablesAndViews`. The corresponding UI code which displays the dropped tables was not changed, creating bug #18523.

This commit fixes #18523 by renaming the key to `DroppedSchemaObjects`, both in the event and in the UI code. The term "Schema Objects" accounts for the existence of sequences in addition to views and tables.

Q: is it worth adding a migration to change old events from `DroppedTablesAndViews` to `DroppedSchemaObjects`? I'm leaning toward no, since (a) it doesn't look like we added a migration when we renamed `DroppedTables` to `DroppedTablesAndViews`, and (b) without the migration, you'll still see the "drop database" event in the UI, just not the table names (but they'll be in `system.eventlog` under the old key).

Release note (admin ui change): display the names of dropped schema objects on `DROP DATABASE ... CASCADE`

Co-authored-by: Ben Darnell <[email protected]>
Co-authored-by: Pete Vilter <[email protected]>
  • Loading branch information
3 people committed Apr 17, 2018
3 parents c184a3b + 98b99aa + d18eb07 commit b1b9635
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 61 deletions.
2 changes: 1 addition & 1 deletion pkg/kv/transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func grpcTransportFactoryImpl(
})
}

// Put known-unhealthy clients last.
// Put known-healthy clients first.
splitHealthy(clients)

return &grpcTransport{
Expand Down
34 changes: 11 additions & 23 deletions pkg/rpc/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,38 +529,26 @@ func (ctx *Context) NewBreaker() *circuit.Breaker {
return newBreaker(&ctx.breakerClock)
}

// ErrNotConnected is returned by ConnHealth when there is no connection to the
// host (e.g. GRPCDial was never called for that address, or a connection has
// been closed and not reconnected).
var ErrNotConnected = errors.New("not connected")

// ErrNotHeartbeated is returned by ConnHealth when we have not yet performed
// the first heartbeat.
var ErrNotHeartbeated = errors.New("not yet heartbeated")

// ConnHealth returns whether the most recent heartbeat succeeded or not.
// This should not be used as a definite status of a node's health and just used
// to prioritize healthy nodes over unhealthy ones.
//
// NB: as of #22658, this does not work as you think. We kick
// connections out of the connection pool as soon as they run into an
// error, at which point their ConnHealth will reset to
// ErrNotConnected. ConnHealth does no more return a sane notion of
// "recent connection health". When it returns nil all seems well, but
// if it doesn't then this may mean that the node is simply refusing
// connections (and is thus unconnected most of the time), or that the
// node hasn't been connected to but is perfectly healthy.
//
// See #23829.
// ConnHealth returns nil if we have an open connection to the given
// target that succeeded on its most recent heartbeat. Otherwise, it
// kicks off a connection attempt (unless one is already in progress
// or we are in a backoff state) and returns an error (typically
// ErrNotHeartbeated). This is a conservative/pessimistic indicator:
// if we have not attempted to talk to the target node recently, an
// error will be returned. This method should therefore be used to
// prioritize among a list of candidate nodes, but not to filter out
// "unhealthy" nodes.
func (ctx *Context) ConnHealth(target string) error {
if ctx.GetLocalInternalServerForAddr(target) != nil {
// The local server is always considered healthy.
return nil
}
if value, ok := ctx.conns.Load(target); ok {
return value.(*Connection).heartbeatResult.Load().(heartbeatResult).err
}
return ErrNotConnected
conn := ctx.GRPCDial(target)
return conn.heartbeatResult.Load().(heartbeatResult).err
}

func (ctx *Context) runHeartbeat(
Expand Down
19 changes: 9 additions & 10 deletions pkg/rpc/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,18 +254,18 @@ func TestHeartbeatHealth(t *testing.T) {
return clientCtx.ConnHealth(remoteAddr)
})

if err := clientCtx.ConnHealth("non-existent connection"); err != ErrNotConnected {
t.Errorf("wanted ErrConnected, not %v", err)
if err := clientCtx.ConnHealth("non-existent connection"); err != ErrNotHeartbeated {
t.Errorf("wanted ErrNotHeartbeated, not %v", err)
}

if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotConnected {
t.Errorf("wanted ErrConnected, not %v", err)
if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotHeartbeated {
t.Errorf("wanted ErrNotHeartbeated, not %v", err)
}

clientCtx.SetLocalInternalServer(&internalServer{})

if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotConnected {
t.Errorf("wanted ErrConnected, not %v", err)
if err := clientCtx.ConnHealth(clientCtx.Addr); err != ErrNotHeartbeated {
t.Errorf("wanted ErrNotHeartbeated, not %v", err)
}
if err := clientCtx.ConnHealth(clientCtx.AdvertiseAddr); err != nil {
t.Error(err)
Expand Down Expand Up @@ -378,11 +378,10 @@ func TestHeartbeatHealthTransport(t *testing.T) {

isUnhealthy := func(err error) bool {
// Most of the time, an unhealthy connection will get
// ErrNotConnected, but there are brief periods during which we
// could get ErrNotHeartbeated (while we're trying to start a new
// connection) or one of the grpc errors below (while the old
// ErrNotHeartbeated, but there are brief periods during which we
// could get one of the grpc errors below (while the old
// connection is in the middle of closing).
if err == ErrNotConnected || err == ErrNotHeartbeated {
if err == ErrNotHeartbeated {
return true
}
// The expected code here is Unavailable, but at least on OSX you can also get
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/distsql_physical_planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ func checkNodeHealth(
err = connHealth(addr)
}

if err != nil && err != rpc.ErrNotConnected && err != rpc.ErrNotHeartbeated {
if err != nil && err != rpc.ErrNotHeartbeated {
// This host is known to be unhealthy. Don't use it (use the gateway
// instead). Note: this can never happen for our nodeID (which
// always has its address in the nodeMap).
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/drop_database.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,10 @@ func (n *dropDatabaseNode) startExec(params runParams) error {
int32(n.dbDesc.ID),
int32(params.extendedEvalCtx.NodeID),
struct {
DatabaseName string
Statement string
User string
DroppedTablesAndViews []string
DatabaseName string
Statement string
User string
DroppedSchemaObjects []string
}{n.n.Name.String(), n.n.String(), p.SessionData().User, tbNameStrings},
)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/event_log
Original file line number Diff line number Diff line change
Expand Up @@ -292,15 +292,15 @@ DROP DATABASE IF EXISTS othereventlogtest CASCADE
# verify event is there, and cascading table drops are logged.

query IIT
SELECT "targetID", "reportingID", info::JSONB->>'DroppedTablesAndViews'
SELECT "targetID", "reportingID", info::JSONB->>'DroppedSchemaObjects'
FROM system.eventlog
WHERE "eventType" = 'drop_database'
AND info::JSONB->>'Statement' LIKE 'DROP DATABASE eventlogtest%'
----
53 1 ["eventlogtest.public.anothertesttable", "eventlogtest.public.testtable"]

query IIT
SELECT "targetID", "reportingID", info::JSONB->>'DroppedTablesAndViews'
SELECT "targetID", "reportingID", info::JSONB->>'DroppedSchemaObjects'
FROM system.eventlog
WHERE "eventType" = 'drop_database'
AND info::JSONB->>'Statement' LIKE 'DROP DATABASE IF EXISTS othereventlogtest%'
Expand Down
37 changes: 37 additions & 0 deletions pkg/ui/src/util/events.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { assert } from "chai";
import { EventInfo, getDroppedObjectsText } from "src/util/events";

describe("getDroppedObjectsText", function() {

// The key indicating which objects were dropped in a DROP_DATABASE event has been
// renamed multiple times, creating bugs (e.g. #18523). This test won't fail if the
// key is renamed again on the Go side, but it at least tests that we can handle all
// existing versions.
it("returns a sentence for all versions of the dropped objects key", function() {
const commonProperties: EventInfo = {
User: "root",
DatabaseName: "foo",
};
const versions: EventInfo[] = [
{
...commonProperties,
DroppedTables: ["foo", "bar"],
},
{
...commonProperties,
DroppedTablesAndViews: ["foo", "bar"],
},
{
...commonProperties,
DroppedSchemaObjects: ["foo", "bar"],
},
];

const expected = "2 schema objects were dropped: foo, bar";

versions.forEach((eventInfoVersion) => {
assert.equal(expected, getDroppedObjectsText(eventInfoVersion));
});
});

});
56 changes: 36 additions & 20 deletions pkg/ui/src/util/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,15 @@ type Event$Properties = protos.cockroach.server.serverpb.EventsResponse.Event$Pr
* getEventDescription returns a short summary of an event.
*/
export function getEventDescription(e: Event$Properties): string {
const info: {
DatabaseName: string,
DroppedTables: string[],
IndexName: string,
MutationID: string,
TableName: string,
User: string,
ViewName: string,
SequenceName: string,
SettingName: string,
Value: string,
} = protobuf.util.isset(e, "info") ? JSON.parse(e.info) : {};
const info: EventInfo = protobuf.util.isset(e, "info") ? JSON.parse(e.info) : {};
const targetId: number = e.target_id ? e.target_id.toNumber() : null;

switch (e.event_type) {
case eventTypes.CREATE_DATABASE:
return `Database Created: User ${info.User} created database ${info.DatabaseName}`;
case eventTypes.DROP_DATABASE:
info.DroppedTables = info.DroppedTables || [];
let tableDropText: string = `${info.DroppedTables.length} tables were dropped: ${info.DroppedTables.join(", ")}`;
if (info.DroppedTables.length === 0) {
tableDropText = "No tables were dropped.";
} else if (info.DroppedTables.length === 1) {
tableDropText = `1 table was dropped: ${info.DroppedTables[0]}`;
}
return `Database Dropped: User ${info.User} dropped database ${info.DatabaseName}.${tableDropText}`;
const tableDropText = getDroppedObjectsText(info);
return `Database Dropped: User ${info.User} dropped database ${info.DatabaseName}. ${tableDropText}`;
case eventTypes.CREATE_TABLE:
return `Table Created: User ${info.User} created table ${info.TableName}`;
case eventTypes.DROP_TABLE:
Expand Down Expand Up @@ -80,3 +63,36 @@ export function getEventDescription(e: Event$Properties): string {
return `Unknown Event Type: ${e.event_type}, content: ${JSON.stringify(info, null, 2)}`;
}
}

// EventInfo corresponds to the `info` column of the `system.eventlog` table
// and the `info` field of the `server.serverpb.EventsResponse.Event` proto.
export interface EventInfo {
User: string;
DatabaseName?: string;
TableName?: string;
IndexName?: string;
MutationID?: string;
ViewName?: string;
SequenceName?: string;
SettingName?: string;
Value?: string;
// The following are three names for the same key (it was renamed twice).
// All ar included for backwards compatibility.
DroppedTables?: string[];
DroppedTablesAndViews?: string[];
DroppedSchemaObjects?: string[];
}

export function getDroppedObjectsText(eventInfo: EventInfo): string {
const droppedObjects =
eventInfo.DroppedSchemaObjects || eventInfo.DroppedTablesAndViews || eventInfo.DroppedTables;
if (!droppedObjects) {
return "";
}
if (droppedObjects.length === 0) {
return "No schema objects were dropped.";
} else if (droppedObjects.length === 1) {
return `1 schema object was dropped: ${droppedObjects[0]}`;
}
return `${droppedObjects.length} schema objects were dropped: ${droppedObjects.join(", ")}`;
}

0 comments on commit b1b9635

Please sign in to comment.