Skip to content

Commit

Permalink
sql: don't distribute migration queries
Browse files Browse the repository at this point in the history
This patch inhibits DistSQL distribution for the queries that the
migrations run. This was prompted by #44101, which is causing a
distributed query done soon after a node startup to sometimes fail.

I've considered more bluntly disabling distribution for any query for a
short period of time after the node starts up, but I went with the more
targeted change to migrations because I think it's a bad idea for
migrations to use query distribution even outside of #44101 -
distributed queries are more fragile than local execution in general
(for example, because of DistSender retries). And migrations can't
tolerate any flakiness.

Fixes #43957
Fixes #44005
Touches #44101

Release note: None
  • Loading branch information
andreimatei committed Jan 18, 2020
1 parent e2b6c96 commit c0af79e
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
1 change: 0 additions & 1 deletion pkg/cmd/roachtest/acceptance.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ func registerAcceptance(r *testRegistry) {
{name: "status-server", fn: runStatusServer},
{
name: "version-upgrade",
skip: "#43957",
fn: runVersionUpgrade,
// This test doesn't like running on old versions because it upgrades to
// the latest released version and then it tries to "head", where head is
Expand Down
15 changes: 14 additions & 1 deletion pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1599,10 +1599,23 @@ func (s *Server) Start(ctx context.Context) error {
if migrationManagerTestingKnobs := s.cfg.TestingKnobs.SQLMigrationManager; migrationManagerTestingKnobs != nil {
mmKnobs = *migrationManagerTestingKnobs.(*sqlmigrations.MigrationManagerTestingKnobs)
}
migrationsExecutor := sql.MakeInternalExecutor(
ctx, s.pgServer.SQLServer, s.internalMemMetrics, s.ClusterSettings())
migrationsExecutor.SetSessionData(
&sessiondata.SessionData{
// Migrations need an executor with query distribution turned off. This is
// because the node crashes if migrations fail to execute, and query
// distribution introduces more moving parts. Local execution is more
// robust; for example, the DistSender has retries if it can't connect to
// another node, but DistSQL doesn't. Also see #44101 for why DistSQL is
// particularly fragile immediately after a node is started (i.e. the
// present situation).
DistSQLMode: sessiondata.DistSQLOff,
})
migMgr := sqlmigrations.NewManager(
s.stopper,
s.db,
s.internalExecutor,
&migrationsExecutor,
s.clock,
mmKnobs,
s.NodeID().String(),
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/event_log
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ AND info NOT LIKE '%sql.stats.automatic_collection.enabled%'
AND info NOT LIKE '%sql.defaults.vectorize%'
ORDER BY "timestamp"
----
0 1 {"SettingName":"diagnostics.reporting.enabled","Value":"true","User":"root"}
0 1 {"SettingName":"diagnostics.reporting.enabled","Value":"true","User":"node"}
0 1 {"SettingName":"kv.range_merge.queue_enabled","Value":"false","User":"root"}
0 1 {"SettingName":"sql.stats.automatic_collection.min_stale_rows","Value":"5","User":"root"}
0 1 {"SettingName":"kv.allocator.load_based_lease_rebalancing.enabled","Value":"false","User":"root"}
Expand Down
15 changes: 10 additions & 5 deletions pkg/sqlmigrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,14 @@ var backwardCompatibleMigrations = []migrationDescriptor{
includedInBootstrap: cluster.VersionByKey(cluster.Version19_2),
workFn: func(ctx context.Context, r runner) error {
// Note that these particular schema changes are idempotent.
if _, err := r.sqlExecutor.ExecWithUser(ctx, "update-reports-meta-generated", nil, /* txn */
security.NodeUser,
if _, err := r.sqlExecutor.ExecEx(ctx, "update-reports-meta-generated", nil, /* txn */
sqlbase.InternalExecutorSessionDataOverride{User: security.NodeUser},
`ALTER TABLE system.reports_meta ALTER generated TYPE TIMESTAMP WITH TIME ZONE`,
); err != nil {
return err
}
if _, err := r.sqlExecutor.ExecWithUser(ctx, "update-reports-meta-generated", nil, /* txn */
security.NodeUser,
if _, err := r.sqlExecutor.ExecEx(ctx, "update-reports-meta-generated", nil, /* txn */
sqlbase.InternalExecutorSessionDataOverride{User: security.NodeUser},
"ALTER TABLE system.replication_constraint_stats ALTER violation_start "+
"TYPE TIMESTAMP WITH TIME ZONE",
); err != nil {
Expand Down Expand Up @@ -769,7 +769,12 @@ func runStmtAsRootWithRetry(
// arbitrarily long time.
var err error
for retry := retry.Start(retry.Options{MaxRetries: 5}); retry.Next(); {
_, err := r.sqlExecutor.Exec(ctx, opName, nil /* txn */, stmt, qargs...)
_, err := r.sqlExecutor.ExecEx(ctx, opName, nil, /* txn */
sqlbase.InternalExecutorSessionDataOverride{
User: security.RootUser,
Database: "system",
},
stmt, qargs...)
if err == nil {
break
}
Expand Down

0 comments on commit c0af79e

Please sign in to comment.