Skip to content

Commit

Permalink
Merge #95658
Browse files Browse the repository at this point in the history
95658: server: only run tenants with service mode 'shared' r=stevendanna a=knz

Fixes #92739.

Previous PRs:
- [x]  sql: improve tenant records #95691 

Prior to this patch, services for secondary tenants would be started
automatically upon first use by a client.

This commit changes this to auto-start services upfront for all
tenants with service mode SHARED. (And shut down services for tenants
with another service mode configured.)

Release note: None
Epic: CRDB-21836


Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
craig[bot] and knz committed Jan 28, 2023
2 parents 502acb0 + 3ff9bc9 commit 69dd453
Show file tree
Hide file tree
Showing 20 changed files with 1,723 additions and 937 deletions.
2 changes: 2 additions & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@
/pkg/server/pgurl/ @cockroachdb/sql-sessions @cockroachdb/cli-prs
/pkg/server/server_http*.go @cockroachdb/obs-inf-prs @cockroachdb/server-prs
/pkg/server/server_import_ts*.go @cockroachdb/obs-inf-prs @cockroachdb/kv-prs
/pkg/server/server_controller_http.go @cockroachdb/obs-inf-prs @cockroachdb/server-prs
/pkg/server/server_controller_sql.go @cockroachdb/sql-sessions @cockroachdb/server-prs
/pkg/server/serverpb/ @cockroachdb/obs-inf-prs @cockroachdb/server-prs
/pkg/server/serverpb/authentication* @cockroachdb/obs-inf-prs @cockroachdb/prodsec @cockroachdb/server-prs
/pkg/server/serverpb/index_reco* @cockroachdb/sql-observability @cockroachdb/obs-inf-prs
Expand Down
42 changes: 42 additions & 0 deletions docs/generated/eventlog.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,48 @@ after being offline.
| `StartedAt` | The time when this node was last started. | no |
| `LastUp` | The approximate last time the node was up before the last restart. | no |

### `tenant_shared_service_start`

An event of type `tenant_shared_service_start` is recorded when a tenant server
is started inside the same process as the KV layer.


| Field | Description | Sensitive |
|--|--|--|
| `OK` | Whether the startup was successful. | no |
| `ErrorText` | If the startup failed, the text of the error. | partially |


#### Common fields

| Field | Description | Sensitive |
|--|--|--|
| `Timestamp` | The timestamp of the event. Expressed as nanoseconds since the Unix epoch. | no |
| `EventType` | The type of the event. | no |
| `NodeID` | The node ID where the event was originated. | no |
| `TenantID` | The ID of the tenant owning the service. | no |
| `InstanceID` | The ID of the server instance. | no |
| `TenantName` | The name of the tenant at the time the event was emitted. | yes |

### `tenant_shared_service_stop`

An event of type `tenant_shared_service_stop` is recorded when a tenant server
is shut down inside the same process as the KV layer.




#### Common fields

| Field | Description | Sensitive |
|--|--|--|
| `Timestamp` | The timestamp of the event. Expressed as nanoseconds since the Unix epoch. | no |
| `EventType` | The type of the event. | no |
| `NodeID` | The node ID where the event was originated. | no |
| `TenantID` | The ID of the tenant owning the service. | no |
| `InstanceID` | The ID of the server instance. | no |
| `TenantName` | The name of the tenant at the time the event was emitted. | yes |

## Debugging events

Events in this category pertain to debugging operations performed by
Expand Down
15 changes: 9 additions & 6 deletions pkg/bench/foreachdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ func benchmarkCockroach(b *testing.B, f BenchmarkFn) {
f(b, sqlutils.MakeSQLRunner(db))
}

// benchmarkInProcessTenantCockroach runs the benchmark against an in-memory
// tenant in a single-node cluster. The tenant runs in the same process as the
// KV host.
func benchmarkInProcessTenantCockroach(b *testing.B, f BenchmarkFn) {
// benchmarkSharedProcessTenantCockroach runs the benchmark against a
// shared process tenant server in a single-node cluster. The tenant
// runs in the same process as the KV host.
func benchmarkSharedProcessTenantCockroach(b *testing.B, f BenchmarkFn) {
ctx := context.Background()
s, db, _ := serverutils.StartServer(
b, base.TestServerArgs{
Expand All @@ -65,7 +65,10 @@ func benchmarkInProcessTenantCockroach(b *testing.B, f BenchmarkFn) {

// Create our own test tenant with a known name.
tenantName := "benchtenant"
_, err := db.Exec("SELECT crdb_internal.create_tenant(10, $1)", tenantName)
_, _, err := s.(*server.TestServer).StartSharedProcessTenant(ctx,
base.TestSharedProcessTenantArgs{
TenantName: roachpb.TenantName(tenantName),
})
require.NoError(b, err)

// Get a SQL connection to the test tenant.
Expand Down Expand Up @@ -203,7 +206,7 @@ func benchmarkMySQL(b *testing.B, f BenchmarkFn) {
func ForEachDB(b *testing.B, fn BenchmarkFn) {
for _, dbFn := range []func(*testing.B, BenchmarkFn){
benchmarkCockroach,
benchmarkInProcessTenantCockroach,
benchmarkSharedProcessTenantCockroach,
benchmarkSepProcessTenantCockroach,
benchmarkMultinodeCockroach,
benchmarkPostgres,
Expand Down
13 changes: 9 additions & 4 deletions pkg/ccl/serverccl/admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
Expand Down Expand Up @@ -162,13 +163,17 @@ func TestListTenants(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{
ctx := context.Background()
s, _, _ := serverutils.StartServer(t, base.TestServerArgs{
DisableDefaultTestTenant: true,
})
defer s.Stopper().Stop(context.Background())
defer s.Stopper().Stop(ctx)

db := sqlutils.MakeSQLRunner(sqlDB)
db.Exec(t, "CREATE TENANT test;")
_, _, err := s.(*server.TestServer).StartSharedProcessTenant(ctx,
base.TestSharedProcessTenantArgs{
TenantName: "test",
})
require.NoError(t, err)

const path = "tenants"
var response serverpb.ListTenantsResponse
Expand Down
68 changes: 66 additions & 2 deletions pkg/ccl/serverccl/server_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/sql/lexbase"
"github.com/cockroachdb/cockroach/pkg/sql/sem/catconstants"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/httputil"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -57,12 +58,21 @@ func TestServerControllerHTTP(t *testing.T) {
require.NoError(t, row.Scan(&id, &secret, &username, &created, &expires))

// Create our own test tenant with a known name.
_, err = db.Exec("SELECT crdb_internal.create_tenant(10, 'hello')")
_, _, err = s.(*server.TestServer).StartSharedProcessTenant(ctx,
base.TestSharedProcessTenantArgs{
TenantName: "hello",
})
require.NoError(t, err)

// Get a SQL connection to the test tenant.
sqlAddr := s.ServingSQLAddr()
db2 := serverutils.OpenDBConn(t, sqlAddr, "cluster:hello/defaultdb", false, s.Stopper())
db2, err := serverutils.OpenDBConnE(sqlAddr, "cluster:hello/defaultdb", false, s.Stopper())
// Expect no error yet: the connection is opened lazily; an
// error here means the parameters were incorrect.
require.NoError(t, err)

// This actually uses the connection.
require.NoError(t, db2.Ping())

// Instantiate the HTTP test username and privileges into the test tenant.
_, err = db2.Exec(fmt.Sprintf(`CREATE USER %s`, lexbase.EscapeSQLIdent(username)))
Expand Down Expand Up @@ -170,3 +180,57 @@ VALUES($1, $2, $3, $4, $5)`, id, secret, username, created, expires)
require.Equal(t, len(body.Sessions), 1)
require.Equal(t, body.Sessions[0].ApplicationName, "hello system")
}

func TestServerStartStop(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()

s, db, _ := serverutils.StartServer(t, base.TestServerArgs{
DisableDefaultTestTenant: true,
})
defer s.Stopper().Stop(ctx)

sqlAddr := s.ServingSQLAddr()

// Create our own test tenant with a known name.
_, err := db.Exec("CREATE TENANT hello")
require.NoError(t, err)

// Make the service alive.
_, err = db.Exec("ALTER TENANT hello START SERVICE SHARED")
require.NoError(t, err)

// Check the liveness.
testutils.SucceedsSoon(t, func() error {
db2, err := serverutils.OpenDBConnE(sqlAddr, "cluster:hello/defaultdb", false, s.Stopper())
// Expect no error yet: the connection is opened lazily; an
// error here means the parameters were incorrect.
require.NoError(t, err)

defer db2.Close()
if err := db2.Ping(); err != nil {
return err
}
return nil
})

// Stop the service. .
_, err = db.Exec("ALTER TENANT hello STOP SERVICE")
require.NoError(t, err)

// Verify that the service is indeed stopped.
testutils.SucceedsSoon(t, func() error {
db2, err := serverutils.OpenDBConnE(sqlAddr, "cluster:hello/defaultdb", false, s.Stopper())
// Expect no error yet: the connection is opened lazily; an
// error here means the parameters were incorrect.
require.NoError(t, err)
defer db2.Close()
if err := db2.Ping(); err != nil {
// Connection error: success.
return nil //nolint:returnerrcheck
}
return errors.New("server still alive")
})
}
6 changes: 6 additions & 0 deletions pkg/server/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ go_library(
"rlimit_unix.go",
"server.go",
"server_controller.go",
"server_controller_accessors.go",
"server_controller_http.go",
"server_controller_new_server.go",
"server_controller_orchestration.go",
"server_controller_sql.go",
"server_http.go",
"server_sql.go",
"server_systemlog_gc.go",
Expand Down Expand Up @@ -129,6 +134,7 @@ go_library(
"//pkg/kv/kvserver/rangelog",
"//pkg/kv/kvserver/reports",
"//pkg/multitenant",
"//pkg/multitenant/mtinfopb",
"//pkg/multitenant/multitenantcpu",
"//pkg/multitenant/multitenantio",
"//pkg/multitenant/tenantcostmodel",
Expand Down
30 changes: 12 additions & 18 deletions pkg/server/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -4304,33 +4304,27 @@ func (s *adminServer) RecoveryVerify(
return nil, errors.AssertionFailedf("To be implemented by #93043")
}

// ListTenants returns a list of active tenants in the cluster. Calling this
// function will start in-process tenants if they are not already running.
// ListTenants returns a list of tenants that are served
// by shared-process services in this server.
func (s *systemAdminServer) ListTenants(
ctx context.Context, _ *serverpb.ListTenantsRequest,
) (*serverpb.ListTenantsResponse, error) {
ie := s.internalExecutor
rowIter, err := ie.QueryIterator(ctx, "list-tenants", nil, /* txn */
`SELECT name FROM system.tenants WHERE active = true AND name IS NOT NULL`)
tenantNames, err := s.server.serverController.getExpectedRunningTenants(ctx, s.internalExecutor)
if err != nil {
return nil, err
}
defer func() { _ = rowIter.Close() }()

var tenantNames []roachpb.TenantName
var hasNext bool
for hasNext, err = rowIter.Next(ctx); hasNext && err == nil; hasNext, err = rowIter.Next(ctx) {
row := rowIter.Cur()
tenantName := tree.MustBeDString(row[0])
tenantNames = append(tenantNames, roachpb.TenantName(tenantName))
}

var tenantList []*serverpb.Tenant
tenantList := make([]*serverpb.Tenant, 0, len(tenantNames))
for _, tenantName := range tenantNames {
server, err := s.server.serverController.getOrCreateServer(ctx, tenantName)
server, err := s.server.serverController.getServer(ctx, tenantName)
if err != nil {
log.Errorf(ctx, "unable to get or create a tenant server: %v", err)
continue
if errors.Is(err, errNoTenantServerRunning) {
// The service for this tenant is not started yet. This is not
// an error - the services are started asynchronously. The
// client can try again later.
continue
}
return nil, err
}
tenantID := server.getTenantID()
tenantList = append(tenantList, &serverpb.Tenant{
Expand Down
4 changes: 4 additions & 0 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,10 @@ func (n *Node) recordJoinEvent(ctx context.Context) {
nodeDetails.StartedAt = n.startedAt
nodeDetails.NodeID = int32(n.Descriptor.NodeID)

n.logStructuredEvent(ctx, event)
}

func (n *Node) logStructuredEvent(ctx context.Context, event logpb.EventPayload) {
// Ensure that the event goes to log files even if LogRangeAndNodeEvents is
// disabled (which means skip the system.eventlog _table_).
log.StructuredEvent(ctx, event)
Expand Down
13 changes: 11 additions & 2 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,9 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
sStatus.baseStatusServer.sqlServer = sqlServer

// Create a server controller.
sc := newServerController(ctx, stopper, st,
sc := newServerController(ctx,
node, cfg.BaseConfig.IDContainer,
stopper, st,
lateBoundServer, &systemServerWrapper{server: lateBoundServer}, systemTenantNameContainer)

// Create the debug API server.
Expand All @@ -1033,7 +1035,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
// https://github.com/cockroachdb/cockroach/issues/84585 is
// implemented.
func(ctx context.Context, name roachpb.TenantName) error {
d, err := sc.getOrCreateServer(ctx, name)
d, err := sc.getServer(ctx, name)
if err != nil {
return err
}
Expand Down Expand Up @@ -1897,6 +1899,13 @@ func (s *Server) PreStart(ctx context.Context) error {
s.stopper,
)

// Let the server controller start watching tenant service mode changes.
if err := s.serverController.start(workersCtx,
s.node.execCfg.InternalDB.Executor(),
); err != nil {
return errors.Wrap(err, "failed to start the server controller")
}

log.Event(ctx, "server initialized")

// Begin recording time series data collected by the status monitor.
Expand Down
Loading

0 comments on commit 69dd453

Please sign in to comment.