From 4d036c3e089058c71ac79be92ec53e8f240b0cef Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Fri, 29 May 2020 12:18:43 +0200 Subject: [PATCH] server: use "read-only" Gossip for tenants We were previously using the Gossip instance of the TestServer against which the tenant was initialized. This commit trims the dependency further by initializing its own Gossip instance which is never written to (i.e. `AddInfo` is never called) and which does not accept incoming connections. As a reminder, the remaining problematic uses of Gossip as of this commit are: - making a `nodeDialer` (for `DistSender`), tracked in: https://github.com/cockroachdb/cockroach/issues/47909 - access to the system config: - `(schemaChangeGCResumer).Resume`, tracked: https://github.com/cockroachdb/cockroach/issues/49691 - `database.Cache`, tracked: https://github.com/cockroachdb/cockroach/issues/49692 - `(physicalplan).spanResolver` (for replica oracle). This is likely not a blocker as we can use a "dumber" oracle in this case; the oracle is used for distsql physical planning of which tenants will do none. Tracked in: https://github.com/cockroachdb/cockroach/issues/48432 Release note: None --- pkg/gossip/gossip.go | 6 +++ pkg/server/testserver.go | 79 +++++++++++++++++++++++++++++++--------- 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/pkg/gossip/gossip.go b/pkg/gossip/gossip.go index fa4d46a677a1..ca6423effe58 100644 --- a/pkg/gossip/gossip.go +++ b/pkg/gossip/gossip.go @@ -1659,6 +1659,12 @@ type DeprecatedGossip struct { w errorutil.TenantSQLDeprecatedWrapper } +// Start calls .Start() on the underlying Gossip instance, which is assumed to +// be non-nil. +func (dg DeprecatedGossip) Start(advertAddr net.Addr, resolvers []resolver.Resolver) { + dg.w.Deprecated(0).(*Gossip).Start(advertAddr, resolvers) +} + // deprecated trades a Github issue tracking the removal of the call for the // wrapped Gossip instance. func (dg DeprecatedGossip) deprecated(issueNo int) *Gossip { diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 989a74d5af97..3877a40d8ab3 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -27,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/config" "github.com/cockroachdb/cockroach/pkg/config/zonepb" "github.com/cockroachdb/cockroach/pkg/gossip" + "github.com/cockroachdb/cockroach/pkg/gossip/resolver" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" @@ -437,27 +438,22 @@ func (d dummyProtectedTSProvider) Protect(context.Context, *kv.Txn, *ptpb.Record // this is tracked in https://github.com/cockroachdb/cockroach/issues/47892. const fakeNodeID = roachpb.NodeID(1) -func testSQLServerArgs(ts *TestServer, tenID roachpb.TenantID) sqlServerArgs { - stopper := ts.Stopper() - clusterName := ts.Cfg.ClusterName - // If we used a dummy gossip, DistSQL and random other things won't work. - // Just use the test server's for now. - // - // TODO(tbg): drop the Gossip dependency. - g := ts.Gossip() - ts = nil // prevent usage below +func testSQLServerArgs( + stopper *stop.Stopper, kvClusterName string, tenID roachpb.TenantID, +) sqlServerArgs { st := cluster.MakeTestingClusterSettings() sqlCfg := makeTestSQLConfig(st, tenID) baseCfg := makeTestBaseConfig(st) + baseCfg.AmbientCtx.AddLogTag("sql", nil) // TODO(tbg): this is needed so that the RPC heartbeats between the testcluster // and this tenant work. // // TODO(tbg): address this when we introduce the real tenant RPCs in: // https://github.com/cockroachdb/cockroach/issues/47898 - baseCfg.ClusterName = clusterName + baseCfg.ClusterName = kvClusterName clock := hlc.NewClock(hlc.UnixNano, time.Duration(baseCfg.MaxOffset)) @@ -474,13 +470,41 @@ func testSQLServerArgs(ts *TestServer, tenID roachpb.TenantID) sqlServerArgs { rpcTestingKnobs, ) + // TODO(tbg): expose this registry via prometheus. See: + // https://github.com/cockroachdb/cockroach/issues/47905 + registry := metric.NewRegistry() + var dsKnobs kvcoord.ClientTestingKnobs if dsKnobsP, ok := baseCfg.TestingKnobs.DistSQL.(*kvcoord.ClientTestingKnobs); ok { dsKnobs = *dsKnobsP } rpcRetryOptions := base.DefaultRetryOptions() - resolver := gossip.AddressResolver(g) // TODO(tbg): break gossip dep - nodeDialer := nodedialer.New(rpcContext, resolver) + + // TODO(nvb): this use of Gossip needs to go. Tracked in: + // https://github.com/cockroachdb/cockroach/issues/47909 + var g *gossip.Gossip + { + var nodeID base.NodeIDContainer + nodeID.Set(context.Background(), fakeNodeID) + var clusterID base.ClusterIDContainer + dummyGossipGRPC := rpc.NewServer(rpcContext) // never Serve()s anything + g = gossip.New( + baseCfg.AmbientCtx, + &clusterID, + &nodeID, + rpcContext, + dummyGossipGRPC, + stopper, + registry, + baseCfg.Locality, + &baseCfg.DefaultZoneConfig, + ) + } + + nodeDialer := nodedialer.New( + rpcContext, + gossip.AddressResolver(g), // TODO(nvb): break gossip dep + ) dsCfg := kvcoord.DistSenderConfig{ AmbientCtx: baseCfg.AmbientCtx, Settings: st, @@ -497,9 +521,7 @@ func testSQLServerArgs(ts *TestServer, tenID roachpb.TenantID) sqlServerArgs { if p, ok := baseCfg.TestingKnobs.KVClient.(*kvcoord.ClientTestingKnobs); ok { clientKnobs = *p } - // TODO(tbg): expose this registry via prometheus. See: - // https://github.com/cockroachdb/cockroach/issues/47905 - registry := metric.NewRegistry() + txnMetrics := kvcoord.MakeTxnMetrics(baseCfg.HistogramWindowInterval()) registry.AddMetricStruct(txnMetrics) tcsFactory := kvcoord.NewTxnCoordSenderFactory( @@ -594,13 +616,24 @@ func (ts *TestServer) StartTenant(params base.TestTenantArgs) (pgAddr string, _ return "", err } - args := testSQLServerArgs(ts, params.TenantID) - if params.AllowSettingClusterSettings { + return startTenant(ts.Stopper(), ts.Cfg.ClusterName, ts.RPCAddr(), params.TenantID, params.AllowSettingClusterSettings) +} + +func startTenant( + stopper *stop.Stopper, + kvClusterName string, + tsRPCAddr string, + tenID roachpb.TenantID, + allowSetClusterSetting bool, +) (pgAddr string, _ error) { + args := testSQLServerArgs(stopper, kvClusterName, tenID) + // TODO(tbg): clean this up. + if allowSetClusterSetting { args.TestingKnobs.TenantTestingKnobs = &sql.TenantTestingKnobs{ ClusterSettingsUpdater: args.Settings.MakeUpdater(), } } - ts = nil // proves we're not using it below + ctx := context.Background() s, err := newSQLServer(ctx, args) if err != nil { @@ -639,6 +672,16 @@ func (ts *TestServer) StartTenant(params base.TestTenantArgs) (pgAddr string, _ ) orphanedLeasesTimeThresholdNanos := args.clock.Now().WallTime + { + rsvlr, err := resolver.NewResolver(tsRPCAddr) + if err != nil { + return "", err + } + // NB: gossip server is not bound to any address, so the advertise addr does + // not matter. + args.gossip.Start(pgL.Addr(), []resolver.Resolver{rsvlr}) + } + if err := s.start(ctx, args.stopper, args.TestingKnobs,