Skip to content

Commit

Permalink
sqlstats: include regions in statement_statistics
Browse files Browse the repository at this point in the history
Part of #89949.

This completes the work abandoned in #92259, properly storing the
regions in which a statement was executed, rather than waiting until UI
render time to try to map (dangerously ephemeral) node IDs to their
respective regions.

With this work in place, we will be able to remove both the UI mapping
code (usages of this [selector][]) and the stop-gap work of #93268.

[selector]: https://github.com/cockroachdb/cockroach/blob/0f6333cbd6c78264a59dc435324c0c33d75ea148/pkg/ui/workspaces/cluster-ui/src/store/nodes/nodes.selectors.ts#L52-L64

Release note (sql change): A regions field was added to the statistics
column of crdb_internal.statement_statistics, reporting the regions of
the nodes on which the statement was executed.
  • Loading branch information
matthewtodd committed Mar 3, 2023
1 parent 6e547d4 commit 3a4e0c8
Show file tree
Hide file tree
Showing 19 changed files with 499 additions and 154 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
/pkg/sql/execstats/ @cockroachdb/sql-observability
/pkg/sql/scheduledlogging/ @cockroachdb/sql-observability
/pkg/sql/sqlstats/ @cockroachdb/sql-observability
/pkg/ccl/testccl/sqlstatsccl/ @cockroachdb/sql-observability

/pkg/sql/sem/tree/ @cockroachdb/sql-syntax-prs
/pkg/sql/parser/ @cockroachdb/sql-syntax-prs
Expand Down
3 changes: 3 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ ALL_TESTS = [
"//pkg/ccl/telemetryccl:telemetryccl_test",
"//pkg/ccl/testccl/authccl:authccl_test",
"//pkg/ccl/testccl/sqlccl:sqlccl_test",
"//pkg/ccl/testccl/sqlstatsccl:sqlstatsccl_test",
"//pkg/ccl/testccl/workload/schemachange:schemachange_test",
"//pkg/ccl/utilccl/sampledataccl:sampledataccl_test",
"//pkg/ccl/utilccl:utilccl_test",
Expand Down Expand Up @@ -845,6 +846,7 @@ GO_TARGETS = [
"//pkg/ccl/telemetryccl:telemetryccl_test",
"//pkg/ccl/testccl/authccl:authccl_test",
"//pkg/ccl/testccl/sqlccl:sqlccl_test",
"//pkg/ccl/testccl/sqlstatsccl:sqlstatsccl_test",
"//pkg/ccl/testccl/workload/schemachange:schemachange_test",
"//pkg/ccl/testutilsccl:testutilsccl",
"//pkg/ccl/utilccl/licenseccl:licenseccl",
Expand Down Expand Up @@ -2435,6 +2437,7 @@ GET_X_DATA_TARGETS = [
"//pkg/ccl/telemetryccl:get_x_data",
"//pkg/ccl/testccl/authccl:get_x_data",
"//pkg/ccl/testccl/sqlccl:get_x_data",
"//pkg/ccl/testccl/sqlstatsccl:get_x_data",
"//pkg/ccl/testccl/workload/schemachange:get_x_data",
"//pkg/ccl/testutilsccl:get_x_data",
"//pkg/ccl/utilccl:get_x_data",
Expand Down
32 changes: 32 additions & 0 deletions pkg/ccl/testccl/sqlstatsccl/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_test")

go_test(
name = "sqlstatsccl_test",
srcs = [
"main_test.go",
"sql_stats_test.go",
],
args = ["-test.timeout=295s"],
deps = [
"//pkg/base",
"//pkg/ccl",
"//pkg/roachpb",
"//pkg/security/securityassets",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/settings/cluster",
"//pkg/sql",
"//pkg/sql/appstatspb",
"//pkg/testutils/serverutils",
"//pkg/testutils/skip",
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/randutil",
"@com_github_stretchr_testify//require",
],
)

get_x_data(name = "get_x_data")
33 changes: 33 additions & 0 deletions pkg/ccl/testccl/sqlstatsccl/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2023 The Cockroach Authors.
//
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
// License (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt

package sqlstatsccl_test

import (
"os"
"testing"

"github.com/cockroachdb/cockroach/pkg/ccl"
"github.com/cockroachdb/cockroach/pkg/security/securityassets"
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

//go:generate ../../../util/leaktest/add-leaktest.sh *_test.go

func TestMain(m *testing.M) {
defer ccl.TestingEnableEnterprise()()
securityassets.SetLoader(securitytest.EmbeddedAssets)
randutil.SeedForTests()
serverutils.InitTestServerFactory(server.TestServerFactory)
serverutils.InitTestClusterFactory(testcluster.TestClusterFactory)
os.Exit(m.Run())
}
151 changes: 151 additions & 0 deletions pkg/ccl/testccl/sqlstatsccl/sql_stats_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Copyright 2023 The Cockroach Authors.
//
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
// License (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt

package sqlstatsccl_test

import (
"context"
gosql "database/sql"
"encoding/json"
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/appstatspb"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/stretchr/testify/require"
)

func TestSQLStatsRegions(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

skip.UnderStress(t, "test is too heavy to run under stress")

// We build a small multiregion cluster, with the proper settings for
// multi-region tenants, then run tests over both the system tenant
// and a secondary tenant, ensuring that a distsql query across multiple
// regions sees those regions reported in sqlstats.
ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
sql.SecondaryTenantsMultiRegionAbstractionsEnabled.Override(ctx, &st.SV, true)
sql.SecondaryTenantZoneConfigsEnabled.Override(ctx, &st.SV, true)

numServers := 9
regionNames := []string{
"gcp-us-west1",
"gcp-us-central1",
"gcp-us-east1",
}

serverArgs := make(map[int]base.TestServerArgs)
for i := 0; i < numServers; i++ {
serverArgs[i] = base.TestServerArgs{
Settings: st,
Locality: roachpb.Locality{
Tiers: []roachpb.Tier{{Key: "region", Value: regionNames[i%len(regionNames)]}},
},
// We'll start our own test tenant manually below.
DisableDefaultTestTenant: true,
}
}

host := testcluster.StartTestCluster(t, numServers, base.TestClusterArgs{
ServerArgsPerNode: serverArgs,
})
defer host.Stopper().Stop(ctx)

testCases := []struct {
name string
db func(t *testing.T, host *testcluster.TestCluster, st *cluster.Settings) *sqlutils.SQLRunner
}{{
// This test runs against the system tenant, opening a database
// connection to the first node in the cluster.
name: "system tenant",
db: func(t *testing.T, host *testcluster.TestCluster, _ *cluster.Settings) *sqlutils.SQLRunner {
return sqlutils.MakeSQLRunner(host.ServerConn(0))
},
}, {
// This test runs against a secondary tenant, launching a SQL instance
// for each node in the underlying cluster and returning a database
// connection to the first one.
name: "secondary tenant",
db: func(t *testing.T, host *testcluster.TestCluster, st *cluster.Settings) *sqlutils.SQLRunner {
var dbs []*gosql.DB
for _, server := range host.Servers {
_, db := serverutils.StartTenant(t, server, base.TestTenantArgs{
Settings: st,
TenantID: roachpb.MustMakeTenantID(11),
Locality: *server.Locality(),
})
dbs = append(dbs, db)
}
return sqlutils.MakeSQLRunner(dbs[0])
},
}}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
db := tc.db(t, host, st)

// Create a multi-region database.
db.Exec(t, "SET enable_multiregion_placement_policy = true")
db.Exec(t, fmt.Sprintf(`CREATE DATABASE testdb PRIMARY REGION "%s" PLACEMENT RESTRICTED`, regionNames[0]))
for i := 1; i < len(regionNames); i++ {
db.Exec(t, fmt.Sprintf(`ALTER DATABASE testdb ADD region "%s"`, regionNames[i]))
}

// Make a multi-region table and split its ranges across regions.
db.Exec(t, "USE testdb")
db.Exec(t, "CREATE TABLE test (a INT) LOCALITY REGIONAL BY ROW")

// Add some data to each region.
for i, regionName := range regionNames {
db.Exec(t, "INSERT INTO test (crdb_region, a) VALUES ($1, $2)", regionName, i)
}

// Select from the table and see what statement statistics were written.
db.Exec(t, "SET application_name = $1", t.Name())
db.Exec(t, "SELECT * FROM test")
row := db.QueryRow(t, `
SELECT statistics->>'statistics'
FROM crdb_internal.statement_statistics
WHERE app_name = $1`, t.Name())

var actualJSON string
row.Scan(&actualJSON)
var actual appstatspb.StatementStatistics
err := json.Unmarshal([]byte(actualJSON), &actual)
require.NoError(t, err)

require.Equal(t,
appstatspb.StatementStatistics{
// TODO(todd): It appears we do not yet reliably record
// the nodes for the statement. (I have manually verified
// that the above query does indeed fan out across the
// regions, via EXPLAIN (DISTSQL).) Filed as #96647.
//Nodes: []int64{1, 2, 3},
//Regions: regionNames,
Nodes: []int64{1},
Regions: []string{regionNames[0]},
},
appstatspb.StatementStatistics{
Nodes: actual.Nodes,
Regions: actual.Regions,
},
)
})
}
}
1 change: 1 addition & 0 deletions pkg/sql/appstatspb/app_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ func (s *StatementStatistics) Add(other *StatementStatistics) {
s.RowsRead.Add(other.RowsRead, s.Count, other.Count)
s.RowsWritten.Add(other.RowsWritten, s.Count, other.Count)
s.Nodes = util.CombineUniqueInt64(s.Nodes, other.Nodes)
s.Regions = util.CombineUniqueString(s.Regions, other.Regions)
s.PlanGists = util.CombineUniqueString(s.PlanGists, other.PlanGists)
s.IndexRecommendations = other.IndexRecommendations
s.Indexes = util.CombineUniqueString(s.Indexes, other.Indexes)
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/appstatspb/app_stats.proto
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ message StatementStatistics {
// Nodes is the ordered list of nodes ids on which the statement was executed.
repeated int64 nodes = 24;

// Regions is the ordered list of regions on which the statement was executed.
repeated string regions = 29;

// PlanGists is the list of a compressed version of plan that can be converted (lossily)
// back into a logical plan.
// Each statement contain only one plan gist, but the same statement fingerprint id
Expand Down
57 changes: 55 additions & 2 deletions pkg/sql/executor_statement_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@ package sql

import (
"context"
"sort"
"strconv"
"sync"

"github.com/cockroachdb/cockroach/pkg/sql/appstatspb"
"github.com/cockroachdb/cockroach/pkg/sql/contentionpb"
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
"github.com/cockroachdb/cockroach/pkg/sql/idxrecommendations"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessionphase"
"github.com/cockroachdb/cockroach/pkg/sql/sqlinstance"
"github.com/cockroachdb/cockroach/pkg/sql/sqlstats"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/log"
Expand Down Expand Up @@ -184,6 +187,10 @@ func (ex *connExecutor) recordStatementSummary(
if err != nil {
log.Warningf(ctx, "failed to convert node ID to int: %s", err)
}

nodes := util.CombineUniqueInt64(getNodesFromPlanner(planner), []int64{nodeID})
regions := getRegionsForNodes(ctx, nodes, planner.DistSQLPlanner().sqlAddressResolver)

recordedStmtStats := sqlstats.RecordedStmtStats{
SessionID: ex.sessionID,
StatementID: stmt.QueryID,
Expand All @@ -199,7 +206,8 @@ func (ex *connExecutor) recordStatementSummary(
BytesRead: stats.bytesRead,
RowsRead: stats.rowsRead,
RowsWritten: stats.rowsWritten,
Nodes: util.CombineUniqueInt64(getNodesFromPlanner(planner), []int64{nodeID}),
Nodes: nodes,
Regions: regions,
StatementType: stmt.AST.StatementType(),
Plan: planner.instrumentation.PlanForStats(ctx),
PlanGist: planner.instrumentation.planGist.String(),
Expand Down Expand Up @@ -317,6 +325,51 @@ func getNodesFromPlanner(planner *planner) []int64 {
nodes = append(nodes, int64(i))
})
}

return nodes
}

var regionsPool = sync.Pool{
New: func() interface{} {
return make(map[string]struct{})
},
}

func getRegionsForNodes(
ctx context.Context, nodeIDs []int64, resolver sqlinstance.AddressResolver,
) []string {
if resolver == nil {
return nil
}

instances, err := resolver.GetAllInstances(ctx)
if err != nil {
return nil
}

regions := regionsPool.Get().(map[string]struct{})
defer func() {
for region := range regions {
delete(regions, region)
}
regionsPool.Put(regions)
}()

for _, instance := range instances {
for _, node := range nodeIDs {
// TODO(todd): Using int64 for nodeIDs was inappropriate, see #95088.
if int32(instance.InstanceID) == int32(node) {
if region, ok := instance.Locality.Find("region"); ok {
regions[region] = struct{}{}
}
break
}
}
}

result := make([]string, 0, len(regions))
for region := range regions {
result = append(result, region)
}
sort.Strings(result)
return result
}
Loading

0 comments on commit 3a4e0c8

Please sign in to comment.