Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
98459: server,autoconfig: automatic configuration via config tasks r=adityamaru a=knz

Epic: CRDB-23559
Informs #98431.
All commits but the last are from #98993.

This change introduces "auto config tasks", a mechanism through which
configuration payloads ("tasks") can be injected into a running SQL
service.

This is driven via the "auto config runner" job that was introduced in
the previous commit. The job listens for the arrival of new task
definitions via a `Provider` interface. When new tasks are known, and
previous tasks have completed, the runner creates a job for the first
next task.

Release note: None

100476: server/drain: shut down SQL subsystems gracefully before releasing table leases r=JeffSwenson,rytaft a=knz

Needed for #99941 and #99958.
Epic: CRDB-23559

See individual commits for details.

100511: sqlccl: deflake TestGCTenantJobWaitsForProtectedTimestamps r=adityamaru,arulajmani a=knz

Fixes #94808

The tenant server must be shut down before the tenant record is removed; otherwise the tenant's SQL server will self-terminate by calling Stop() on its stopper, which in this case was shared with the parent cluster.

Release note: None

Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
craig[bot] and knz committed Apr 3, 2023
4 parents 8e9273c + c79194c + 0a0985e + 58c54d9 commit 04a92b6
Show file tree
Hide file tree
Showing 39 changed files with 1,912 additions and 69 deletions.
1 change: 1 addition & 0 deletions docs/generated/http/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ genrule(
"//pkg/multitenant/mtinfopb:mtinfopb_proto",
"//pkg/multitenant/tenantcapabilities/tenantcapabilitiespb:tenantcapabilitiespb_proto",
"//pkg/roachpb:roachpb_proto",
"//pkg/server/autoconfig/autoconfigpb:autoconfigpb_proto",
"//pkg/server/diagnostics/diagnosticspb:diagnosticspb_proto",
"//pkg/server/serverpb:serverpb_proto",
"//pkg/server/status/statuspb:statuspb_proto",
Expand Down
6 changes: 6 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ ALL_TESTS = [
"//pkg/security/username:username_disallowed_imports_test",
"//pkg/security/username:username_test",
"//pkg/security:security_test",
"//pkg/server/autoconfig:autoconfig_test",
"//pkg/server/debug/goroutineui:goroutineui_test",
"//pkg/server/debug/pprofui:pprofui_test",
"//pkg/server/debug:debug_test",
Expand Down Expand Up @@ -1431,7 +1432,10 @@ GO_TARGETS = [
"//pkg/security/username:username_test",
"//pkg/security:security",
"//pkg/security:security_test",
"//pkg/server/autoconfig/acprovider:acprovider",
"//pkg/server/autoconfig/autoconfigpb:autoconfigpb",
"//pkg/server/autoconfig:autoconfig",
"//pkg/server/autoconfig:autoconfig_test",
"//pkg/server/debug/goroutineui:goroutineui",
"//pkg/server/debug/goroutineui:goroutineui_test",
"//pkg/server/debug/pprofui:pprofui",
Expand Down Expand Up @@ -2814,6 +2818,8 @@ GET_X_DATA_TARGETS = [
"//pkg/security/username:get_x_data",
"//pkg/server:get_x_data",
"//pkg/server/autoconfig:get_x_data",
"//pkg/server/autoconfig/acprovider:get_x_data",
"//pkg/server/autoconfig/autoconfigpb:get_x_data",
"//pkg/server/debug:get_x_data",
"//pkg/server/debug/goroutineui:get_x_data",
"//pkg/server/debug/pprofui:get_x_data",
Expand Down
1 change: 1 addition & 0 deletions pkg/base/testing_knobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,5 @@ type TestingKnobs struct {
LOQRecovery ModuleTestingKnobs
KeyVisualizer ModuleTestingKnobs
TenantCapabilitiesTestingKnobs ModuleTestingKnobs
AutoConfig ModuleTestingKnobs
}
12 changes: 10 additions & 2 deletions pkg/ccl/testccl/sqlccl/tenant_gc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
Expand Down Expand Up @@ -470,7 +471,7 @@ func TestGCTableOrIndexWaitsForProtectedTimestamps(t *testing.T) {
func TestGCTenantJobWaitsForProtectedTimestamps(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
skip.WithIssue(t, 94808)

defer gcjob.SetSmallMaxGCIntervalForTest()()

ctx := context.Background()
Expand Down Expand Up @@ -614,8 +615,11 @@ func TestGCTenantJobWaitsForProtectedTimestamps(t *testing.T) {
tenID := roachpb.MustMakeTenantID(10)
sqlDB.Exec(t, "ALTER RANGE tenants CONFIGURE ZONE USING gc.ttlseconds = 1;")

tenantStopper := stop.NewStopper()
defer tenantStopper.Stop(ctx) // in case the test fails prematurely.

ten, conn10 := serverutils.StartTenant(t, srv,
base.TestTenantArgs{TenantID: tenID, Stopper: srv.Stopper()})
base.TestTenantArgs{TenantID: tenID, Stopper: tenantStopper})
defer conn10.Close()

// Write a cluster PTS record as the tenant.
Expand All @@ -630,6 +634,10 @@ func TestGCTenantJobWaitsForProtectedTimestamps(t *testing.T) {
return tenPtp.WithTxn(txn).Protect(ctx, rec)
}))

// Ensure the secondary tenant is not running any more tasks.
tenantStopper.Stop(ctx)

// Drop the record.
sqlDB.Exec(t, `DROP TENANT [$1]`, tenID.ToUint64())

sqlDB.CheckQueryResultsRetry(
Expand Down
1 change: 1 addition & 0 deletions pkg/gen/protobuf.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ PROTOBUF_SRCS = [
"//pkg/repstream/streampb:streampb_go_proto",
"//pkg/roachpb:roachpb_go_proto",
"//pkg/rpc:rpc_go_proto",
"//pkg/server/autoconfig/autoconfigpb:autoconfigpb_go_proto",
"//pkg/server/diagnostics/diagnosticspb:diagnosticspb_go_proto",
"//pkg/server/serverpb:serverpb_go_proto",
"//pkg/server/status/statuspb:statuspb_go_proto",
Expand Down
4 changes: 4 additions & 0 deletions pkg/jobs/adopt.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ func (r *Registry) addAdoptedJob(
func (r *Registry) runJob(
ctx context.Context, resumer Resumer, job *Job, status Status, taskName string,
) error {
if r.IsDraining() {
return errors.Newf("refusing to start %q; job registry is draining", taskName)
}

job.mu.Lock()
var finalResumeError error
if job.mu.payload.FinalResumeError != nil {
Expand Down
2 changes: 2 additions & 0 deletions pkg/jobs/jobspb/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ proto_library(
"//pkg/kv/kvpb:kvpb_proto",
"//pkg/multitenant/mtinfopb:mtinfopb_proto",
"//pkg/roachpb:roachpb_proto",
"//pkg/server/autoconfig/autoconfigpb:autoconfigpb_proto",
"//pkg/sql/catalog/descpb:descpb_proto",
"//pkg/sql/sessiondatapb:sessiondatapb_proto",
"//pkg/util/hlc:hlc_proto",
Expand All @@ -54,6 +55,7 @@ go_proto_library(
"//pkg/multitenant/mtinfopb",
"//pkg/roachpb",
"//pkg/security/username", # keep
"//pkg/server/autoconfig/autoconfigpb",
"//pkg/sql/catalog/catpb", # keep
"//pkg/sql/catalog/descpb",
"//pkg/sql/sem/tree", # keep
Expand Down
24 changes: 23 additions & 1 deletion pkg/jobs/jobspb/jobs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import "sql/sessiondatapb/session_data.proto";
import "util/hlc/timestamp.proto";
import "clusterversion/cluster_version.proto";
import "google/protobuf/timestamp.proto";
import "server/autoconfig/autoconfigpb/autoconfig.proto";

enum EncryptionMode {
Passphrase = 0;
Expand Down Expand Up @@ -1177,6 +1178,21 @@ message AutoConfigRunnerDetails {
message AutoConfigRunnerProgress {
}

message AutoConfigEnvRunnerDetails {
string env_id = 1 [(gogoproto.customname) = "EnvID", (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/server/autoconfig/autoconfigpb.EnvironmentID"];
}

message AutoConfigEnvRunnerProgress {
}

message AutoConfigTaskDetails {
string env_id = 1 [(gogoproto.customname) = "EnvID", (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/server/autoconfig/autoconfigpb.EnvironmentID"];
cockroach.server.autoconfig.autoconfigpb.Task task = 2 [(gogoproto.nullable) = false];
}

message AutoConfigTaskProgress {
}

message Payload {
string description = 1;
// If empty, the description is assumed to be the statement.
Expand Down Expand Up @@ -1236,6 +1252,8 @@ message Payload {
PollJobsStatsDetails poll_jobs_stats = 39;

AutoConfigRunnerDetails auto_config_runner = 41;
AutoConfigEnvRunnerDetails auto_config_env_runner = 42;
AutoConfigTaskDetails auto_config_task = 43;
}
reserved 26;
// PauseReason is used to describe the reason that the job is currently paused
Expand Down Expand Up @@ -1263,7 +1281,7 @@ message Payload {
// specifies how old such record could get before this job is canceled.
int64 maximum_pts_age = 40 [(gogoproto.casttype) = "time.Duration", (gogoproto.customname) = "MaximumPTSAge"];

// NEXT ID: 42
// NEXT ID: 44
}

message Progress {
Expand Down Expand Up @@ -1306,6 +1324,8 @@ message Progress {
KeyVisualizerProgress keyVisualizerProgress = 27;
PollJobsStatsProgress pollJobsStats = 28;
AutoConfigRunnerProgress auto_config_runner = 29;
AutoConfigEnvRunnerProgress auto_config_env_runner = 30;
AutoConfigTaskProgress auto_config_task = 31;
}

uint64 trace_id = 21 [(gogoproto.nullable) = false, (gogoproto.customname) = "TraceID", (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb.TraceID"];
Expand Down Expand Up @@ -1338,6 +1358,8 @@ enum Type {
KEY_VISUALIZER = 18 [(gogoproto.enumvalue_customname) = "TypeKeyVisualizer"];
POLL_JOBS_STATS = 19 [(gogoproto.enumvalue_customname) = "TypePollJobsStats"];
AUTO_CONFIG_RUNNER = 20 [(gogoproto.enumvalue_customname) = "TypeAutoConfigRunner"];
AUTO_CONFIG_ENV_RUNNER = 21 [(gogoproto.enumvalue_customname) = "TypeAutoConfigEnvRunner"];
AUTO_CONFIG_TASK = 22 [(gogoproto.enumvalue_customname) = "TypeAutoConfigTask"];
}

message Job {
Expand Down
30 changes: 29 additions & 1 deletion pkg/jobs/jobspb/wrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ var (
_ Details = SchemaTelemetryDetails{}
_ Details = KeyVisualizerDetails{}
_ Details = AutoConfigRunnerDetails{}
_ Details = AutoConfigEnvRunnerDetails{}
_ Details = AutoConfigTaskDetails{}
)

// ProgressDetails is a marker interface for job progress details proto structs.
Expand All @@ -70,6 +72,8 @@ var (
_ ProgressDetails = SchemaTelemetryProgress{}
_ ProgressDetails = KeyVisualizerProgress{}
_ ProgressDetails = AutoConfigRunnerProgress{}
_ ProgressDetails = AutoConfigEnvRunnerProgress{}
_ ProgressDetails = AutoConfigTaskProgress{}
)

// Type returns the payload's job type and panics if the type is invalid.
Expand Down Expand Up @@ -149,6 +153,8 @@ var AutomaticJobTypes = [...]Type{
TypeAutoSchemaTelemetry,
TypePollJobsStats,
TypeAutoConfigRunner,
TypeAutoConfigEnvRunner,
TypeAutoConfigTask,
TypeKeyVisualizer,
}

Expand Down Expand Up @@ -197,6 +203,10 @@ func DetailsType(d isPayload_Details) (Type, error) {
return TypePollJobsStats, nil
case *Payload_AutoConfigRunner:
return TypeAutoConfigRunner, nil
case *Payload_AutoConfigEnvRunner:
return TypeAutoConfigEnvRunner, nil
case *Payload_AutoConfigTask:
return TypeAutoConfigTask, nil
default:
return TypeUnspecified, errors.Newf("Payload.Type called on a payload with an unknown details type: %T", d)
}
Expand Down Expand Up @@ -238,6 +248,8 @@ var JobDetailsForEveryJobType = map[Type]Details{
TypeKeyVisualizer: KeyVisualizerDetails{},
TypePollJobsStats: PollJobsStatsDetails{},
TypeAutoConfigRunner: AutoConfigRunnerDetails{},
TypeAutoConfigEnvRunner: AutoConfigEnvRunnerDetails{},
TypeAutoConfigTask: AutoConfigTaskDetails{},
}

// WrapProgressDetails wraps a ProgressDetails object in the protobuf wrapper
Expand Down Expand Up @@ -287,6 +299,10 @@ func WrapProgressDetails(details ProgressDetails) interface {
return &Progress_PollJobsStats{PollJobsStats: &d}
case AutoConfigRunnerProgress:
return &Progress_AutoConfigRunner{AutoConfigRunner: &d}
case AutoConfigEnvRunnerProgress:
return &Progress_AutoConfigEnvRunner{AutoConfigEnvRunner: &d}
case AutoConfigTaskProgress:
return &Progress_AutoConfigTask{AutoConfigTask: &d}
default:
panic(errors.AssertionFailedf("WrapProgressDetails: unknown progress type %T", d))
}
Expand Down Expand Up @@ -334,6 +350,10 @@ func (p *Payload) UnwrapDetails() Details {
return *d.PollJobsStats
case *Payload_AutoConfigRunner:
return *d.AutoConfigRunner
case *Payload_AutoConfigEnvRunner:
return *d.AutoConfigEnvRunner
case *Payload_AutoConfigTask:
return *d.AutoConfigTask
default:
return nil
}
Expand Down Expand Up @@ -381,6 +401,10 @@ func (p *Progress) UnwrapDetails() ProgressDetails {
return *d.PollJobsStats
case *Progress_AutoConfigRunner:
return *d.AutoConfigRunner
case *Progress_AutoConfigEnvRunner:
return *d.AutoConfigEnvRunner
case *Progress_AutoConfigTask:
return *d.AutoConfigTask
default:
return nil
}
Expand Down Expand Up @@ -452,6 +476,10 @@ func WrapPayloadDetails(details Details) interface {
return &Payload_PollJobsStats{PollJobsStats: &d}
case AutoConfigRunnerDetails:
return &Payload_AutoConfigRunner{AutoConfigRunner: &d}
case AutoConfigEnvRunnerDetails:
return &Payload_AutoConfigEnvRunner{AutoConfigEnvRunner: &d}
case AutoConfigTaskDetails:
return &Payload_AutoConfigTask{AutoConfigTask: &d}
default:
panic(errors.AssertionFailedf("jobs.WrapPayloadDetails: unknown details type %T", d))
}
Expand Down Expand Up @@ -487,7 +515,7 @@ const (
func (Type) SafeValue() {}

// NumJobTypes is the number of jobs types.
const NumJobTypes = 21
const NumJobTypes = 23

// ChangefeedDetailsMarshaler allows for dependency injection of
// cloud.SanitizeExternalStorageURI to avoid the dependency from this
Expand Down
Loading

0 comments on commit 04a92b6

Please sign in to comment.