diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index ed592c7d3c2b..92a560bf3259 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -96,6 +96,7 @@ go_library( "mixed_version_schemachange.go", "multitenant.go", "multitenant_distsql.go", + "multitenant_shared_process.go", "multitenant_tpch.go", "multitenant_upgrade.go", "multitenant_utils.go", diff --git a/pkg/cmd/roachtest/tests/cluster_to_cluster.go b/pkg/cmd/roachtest/tests/cluster_to_cluster.go index a98935ae0dae..f42aff2ac8cd 100644 --- a/pkg/cmd/roachtest/tests/cluster_to_cluster.go +++ b/pkg/cmd/roachtest/tests/cluster_to_cluster.go @@ -219,10 +219,14 @@ func setupC2C( srcClusterSettings(t, srcSQL) destClusterSettings(t, destSQL) + createTenantAdminRole(t, "src-system", srcSQL) + createTenantAdminRole(t, "dst-system", destSQL) + srcTenantID, destTenantID := 2, 2 srcTenantName := "src-tenant" destTenantName := "destination-tenant" - srcSQL.Exec(t, fmt.Sprintf(`CREATE TENANT %q`, srcTenantName)) + + createInMemoryTenant(ctx, t, c, srcTenantName, srcCluster, true) pgURL, err := copyPGCertsAndMakeURL(ctx, t, c, srcNode, srcClusterSetting.PGUrlCertsDir, addr[0]) require.NoError(t, err) @@ -241,18 +245,6 @@ func setupC2C( db: destDB, nodes: dstCluster} - // Currently, a tenant has by default a 10m RU burst limit, which can be - // reached during these tests. To prevent RU limit throttling, add 10B RUs to - // the tenant. - srcTenantInfo.sql.Exec(t, `SELECT crdb_internal.update_tenant_resource_limits($1, 10000000000, 0, -10000000000, now(), 0);`, srcTenantInfo.ID) - - createSystemRole(t, srcTenantInfo.name+" system tenant", srcTenantInfo.sql) - createSystemRole(t, srcTenantInfo.name+" system tenant", destTenantInfo.sql) - - srcTenantDB := c.Conn(ctx, t.L(), srcNode[0], option.TenantName(srcTenantName)) - srcTenantSQL := sqlutils.MakeSQLRunner(srcTenantDB) - createSystemRole(t, destTenantInfo.name+" app tenant", srcTenantSQL) return &c2cSetup{ src: srcTenantInfo, dst: destTenantInfo, @@ -260,16 +252,6 @@ func setupC2C( metrics: c2cMetrics{}} } -// createSystemRole creates a role that can be used to log into the cluster's db console -func createSystemRole(t test.Test, name string, sql *sqlutils.SQLRunner) { - username := "secure" - password := "roach" - sql.Exec(t, fmt.Sprintf(`CREATE ROLE %s WITH LOGIN PASSWORD '%s'`, username, password)) - sql.Exec(t, fmt.Sprintf(`GRANT ADMIN TO %s`, username)) - t.L().Printf(`Log into the %s db console with username "%s" and password "%s"`, - name, username, password) -} - type streamingWorkload interface { // sourceInitCmd returns a command that will populate the src cluster with data before the // replication stream begins diff --git a/pkg/cmd/roachtest/tests/disk_stall.go b/pkg/cmd/roachtest/tests/disk_stall.go index 5bba262bfb56..97da06c5e0eb 100644 --- a/pkg/cmd/roachtest/tests/disk_stall.go +++ b/pkg/cmd/roachtest/tests/disk_stall.go @@ -15,125 +15,427 @@ import ( "fmt" "math/rand" "runtime" + "strconv" "strings" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/stretchr/testify/require" ) // registerDiskStalledDetection registers the disk stall test. func registerDiskStalledDetection(r registry.Registry) { - for _, affectsLogDir := range []bool{false, true} { - for _, affectsDataDir := range []bool{false, true} { + stallers := map[string]func(test.Test, cluster.Cluster) diskStaller{ + "dmsetup": func(t test.Test, c cluster.Cluster) diskStaller { return &dmsetupDiskStaller{t: t, c: c} }, + "cgroup/read-write/logs-too=false": func(t test.Test, c cluster.Cluster) diskStaller { + return &cgroupDiskStaller{t: t, c: c, readOrWrite: []string{"write", "read"}} + }, + "cgroup/read-write/logs-too=true": func(t test.Test, c cluster.Cluster) diskStaller { + return &cgroupDiskStaller{t: t, c: c, readOrWrite: []string{"write", "read"}, logsToo: true} + }, + "cgroup/write-only/logs-too=true": func(t test.Test, c cluster.Cluster) diskStaller { + return &cgroupDiskStaller{t: t, c: c, readOrWrite: []string{"write"}, logsToo: true} + }, + } + for name, makeStaller := range stallers { + name, makeStaller := name, makeStaller + r.Add(registry.TestSpec{ + Name: fmt.Sprintf("disk-stalled/%s", name), + Owner: registry.OwnerStorage, + Cluster: r.MakeClusterSpec(4), + Timeout: 20 * time.Minute, + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + runDiskStalledDetection(ctx, t, c, makeStaller(t, c)) + }, + // Encryption is implemented within the virtual filesystem layer, + // just like disk-health monitoring. It's important to exercise + // encryption-at-rest to ensure there is not unmonitored I/O within + // the encryption-at-rest implementation that could indefinitely + // stall the process during a disk stall. + EncryptionSupport: registry.EncryptionMetamorphic, + }) + } + + for _, stallLogDir := range []bool{false, true} { + for _, stallDataDir := range []bool{false, true} { // Grab copies of the args because we'll pass them into a closure. // Everyone's favorite bug to write in Go. - affectsLogDir := affectsLogDir - affectsDataDir := affectsDataDir + stallLogDir := stallLogDir + stallDataDir := stallDataDir r.Add(registry.TestSpec{ + Skip: "#95886", + SkipDetails: "The test current only induces a 50us disk-stall, which cannot be reliably detected.", Name: fmt.Sprintf( - "disk-stalled/log=%t,data=%t", - affectsLogDir, affectsDataDir, + "disk-stalled/fuse/log=%t,data=%t", + stallLogDir, stallDataDir, ), Owner: registry.OwnerStorage, - Cluster: r.MakeClusterSpec(1), + Cluster: r.MakeClusterSpec(4), Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { - runDiskStalledDetection(ctx, t, c, affectsLogDir, affectsDataDir) + runDiskStalledDetection(ctx, t, c, &fuseDiskStaller{ + t: t, + c: c, + stallLogs: stallLogDir, + stallData: stallDataDir, + }) }, + EncryptionSupport: registry.EncryptionMetamorphic, }) } } } -func runDiskStalledDetection( - ctx context.Context, t test.Test, c cluster.Cluster, affectsLogDir bool, affectsDataDir bool, -) { - if c.IsLocal() && runtime.GOOS != "linux" { - t.Fatalf("must run on linux os, found %s", runtime.GOOS) - } +func runDiskStalledDetection(ctx context.Context, t test.Test, c cluster.Cluster, s diskStaller) { + const maxSyncDur = 10 * time.Second + startOpts := option.DefaultStartOpts() + startSettings := install.MakeClusterSettings() + startSettings.Env = append(startSettings.Env, + "COCKROACH_AUTO_BALLAST=false", + fmt.Sprintf("COCKROACH_LOG_MAX_SYNC_DURATION=%s", maxSyncDur), + fmt.Sprintf("COCKROACH_ENGINE_MAX_SYNC_DURATION_DEFAULT=%s", maxSyncDur)) - n := c.Node(1) + t.Status("setting up disk staller") + s.Setup(ctx) + defer s.Cleanup(ctx) + t.Status("starting cluster") c.Put(ctx, t.Cockroach(), "./cockroach") - c.Run(ctx, n, "sudo umount -f {store-dir}/faulty || true") - c.Run(ctx, n, "mkdir -p {store-dir}/{real,faulty} || true") - // Make sure the actual logs are downloaded as artifacts. - c.Run(ctx, n, "rm -f logs && ln -s {store-dir}/real/logs logs || true") - - t.Status("setting up charybdefs") - - if err := c.Install(ctx, t.L(), n, "charybdefs"); err != nil { - t.Fatal(err) - } - c.Run(ctx, n, "sudo charybdefs {store-dir}/faulty -oallow_other,modules=subdir,subdir={store-dir}/real") - c.Run(ctx, n, "sudo mkdir -p {store-dir}/real/logs") - c.Run(ctx, n, "sudo chmod -R 777 {store-dir}/{real,faulty}") - - errCh := make(chan install.RunResultDetails) - - // NB: charybdefs' delay nemesis introduces 50ms per syscall. It would - // be nicer to introduce a longer delay, but this works. - tooShortSync := 40 * time.Millisecond - - maxLogSync := time.Hour - logDir := "real/logs" - if affectsLogDir { - logDir = "faulty/logs" - maxLogSync = tooShortSync - } - maxDataSync := time.Hour - dataDir := "real" - if affectsDataDir { - maxDataSync = tooShortSync - dataDir = "faulty" - } - - tStarted := timeutil.Now() - dur := 10 * time.Minute - if !affectsDataDir && !affectsLogDir { - dur = 30 * time.Second - } - - go func() { - t.WorkerStatus("running server") - result, err := c.RunWithDetailsSingleNode(ctx, t.L(), n, - fmt.Sprintf("timeout --signal 9 %ds env "+ - "COCKROACH_ENGINE_MAX_SYNC_DURATION_DEFAULT=%s "+ - "COCKROACH_LOG_MAX_SYNC_DURATION=%s "+ - "COCKROACH_AUTO_BALLAST=false "+ - "./cockroach start-single-node --insecure --store {store-dir}/%s --log '{sinks: {stderr: {filter: INFO}}, file-defaults: {dir: \"{store-dir}/%s\"}}'", - int(dur.Seconds()), maxDataSync, maxLogSync, dataDir, logDir, - ), - ) - if err != nil { - result.Err = err + c.Start(ctx, t.L(), startOpts, startSettings, c.Range(1, 3)) + + // Assert the process monotonic times are as expected. + start, ok := getProcessStartMonotonic(ctx, t, c, 1) + if !ok { + t.Fatal("unable to retrieve process start time; did Cockroach not start?") + } + if exit, exitOk := getProcessExitMonotonic(ctx, t, c, 1); exitOk { + t.Fatalf("process has an exit monotonic time of %d; did Cockroach already exit?", exit) + } + + adminUIAddrs, err := c.ExternalAdminUIAddr(ctx, t.L(), c.Nodes(2)) + require.NoError(t, err) + adminURL := adminUIAddrs[0] + + c.Run(ctx, c.Node(4), `./cockroach workload init kv --splits 1000 {pgurl:1}`) + // Open SQL connections—one to n1, the node that will be stalled, and one to + // n2 that should remain open and active for the remainder. + n1Conn := c.Conn(ctx, t.L(), 1) + defer n1Conn.Close() + n2conn := c.Conn(ctx, t.L(), 2) + defer n2conn.Close() + require.NoError(t, n1Conn.PingContext(ctx)) + _, err = n2conn.ExecContext(ctx, `USE kv;`) + require.NoError(t, err) + + // Wait for upreplication. + require.NoError(t, WaitFor3XReplication(ctx, t, n2conn)) + + t.Status("starting workload") + workloadStartAt := timeutil.Now() + m := c.NewMonitor(ctx, c.Range(1, 3)) + m.Go(func(ctx context.Context) error { + c.Run(ctx, c.Node(4), `./cockroach workload run kv --read-percent 50 `+ + `--duration 10m --concurrency 256 --max-rate 2048 --tolerate-errors `+ + ` --min-block-bytes=512 --max-block-bytes=512 `+ + `{pgurl:1-3}`) + return nil + }) + + // Wait between [3m,6m) before stalling the disk. + pauseDur := 3*time.Minute + time.Duration(rand.Intn(3))*time.Minute + pauseBeforeStall := time.After(pauseDur) + t.Status("pausing ", pauseDur, " before inducing write stall") + select { + case <-ctx.Done(): + t.Fatalf("context done before stall: %s", ctx.Err()) + case <-pauseBeforeStall: + } + + stallAt := timeutil.Now() + response := mustGetMetrics(t, adminURL, workloadStartAt, stallAt, []tsQuery{ + {name: "cr.node.txn.commits", queryType: total}, + }) + cum := response.Results[0].Datapoints + totalTxnsPreStall := cum[len(cum)-1].Value - cum[0].Value + t.L().PrintfCtx(ctx, "%.2f transactions completed before stall", totalTxnsPreStall) + + t.Status("inducing write stall") + m.ExpectDeath() + s.Stall(ctx, c.Node(1)) + defer s.Unstall(ctx, c.Node(1)) + + // Wait twice the maximum sync duration and check if our SQL connection to + // node 1 is still alive. It should've been terminated. + { + t.Status("waiting ", 2*maxSyncDur, " before checking SQL conn to n1") + select { + case <-ctx.Done(): + t.Fatal(ctx.Err()) + case <-time.After(2 * maxSyncDur): } - errCh <- result - }() + t.Status("pinging SQL connection to n1") + err := n1Conn.PingContext(ctx) + t.L().PrintfCtx(ctx, "pinging n1's connection: %s", err) + if err == nil { + t.Fatal("connection to n1 is still alive") + } + } + + // Let the workload continue after the stall. + { + workloadPauseDur := 10*time.Minute - timeutil.Since(workloadStartAt) + t.Status("letting workload continue for ", workloadPauseDur, " with n1 stalled") + select { + case <-ctx.Done(): + t.Fatal(ctx.Err()) + case <-time.After(workloadPauseDur): + } + } + + { + now := timeutil.Now() + response := mustGetMetrics(t, adminURL, workloadStartAt, now, []tsQuery{ + {name: "cr.node.txn.commits", queryType: total}, + }) + cum := response.Results[0].Datapoints + totalTxnsPostStall := cum[len(cum)-1].Value - totalTxnsPreStall + preStallTPS := totalTxnsPreStall / stallAt.Sub(workloadStartAt).Seconds() + postStallTPS := totalTxnsPostStall / now.Sub(stallAt).Seconds() + t.L().PrintfCtx(ctx, "%.2f total transactions committed after stall\n", totalTxnsPostStall) + t.L().PrintfCtx(ctx, "pre-stall tps: %.2f, post-stall tps: %.2f\n", preStallTPS, postStallTPS) + if postStallTPS < preStallTPS/2 { + t.Fatalf("post-stall TPS %.2f is less than 50%% of pre-stall TPS %.2f", postStallTPS, preStallTPS) + } + } + + { + t.Status("counting kv rows") + var rowCount int + require.NoError(t, n2conn.QueryRowContext(ctx, `SELECT count(v) FROM kv`).Scan(&rowCount)) + t.L().PrintfCtx(ctx, "Scan found %d rows.\n", rowCount) + } + + // Unstall the stalled node. It should be able to be reaped. + s.Unstall(ctx, c.Node(1)) + time.Sleep(1 * time.Second) + exit, ok := getProcessExitMonotonic(ctx, t, c, 1) + if !ok { + t.Fatalf("unable to retrieve process exit time; stall went undetected") + } + t.L().PrintfCtx(ctx, "node exited at %s after test start\n", exit-start) +} + +func getProcessStartMonotonic( + ctx context.Context, t test.Test, c cluster.Cluster, nodeID int, +) (time.Duration, bool) { + return getProcessMonotonicTimestamp(ctx, t, c, nodeID, "ActiveEnterTimestampMonotonic") +} + +func getProcessExitMonotonic( + ctx context.Context, t test.Test, c cluster.Cluster, nodeID int, +) (time.Duration, bool) { + return getProcessMonotonicTimestamp(ctx, t, c, nodeID, "ActiveExitTimestampMonotonic") +} + +func getProcessMonotonicTimestamp( + ctx context.Context, t test.Test, c cluster.Cluster, nodeID int, prop string, +) (time.Duration, bool) { + details, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(nodeID), + "systemctl show cockroach.service --property="+prop) + require.NoError(t, err) + require.NoError(t, details.Err) + parts := strings.Split(details.Stdout, "=") + if len(parts) < 2 { + return 0, false + } + s := strings.TrimSpace(parts[1]) + if s == "" { + return 0, false + } + u, err := strconv.ParseUint(s, 10, 64) + if err != nil { + t.Fatalf("unable to parse monotonic timestamp %q: %s", parts[1], err) + } + if u == 0 { + return 0, false + } + return time.Duration(u) * time.Microsecond, true +} + +type diskStaller interface { + Setup(ctx context.Context) + Cleanup(ctx context.Context) + Stall(ctx context.Context, nodes option.NodeListOption) + Unstall(ctx context.Context, nodes option.NodeListOption) + DataDir() string + LogDir() string +} + +type dmsetupDiskStaller struct { + t test.Test + c cluster.Cluster +} + +var _ diskStaller = (*dmsetupDiskStaller)(nil) + +func (s *dmsetupDiskStaller) device() string { return getDevice(s.t, s.c.Spec()) } + +func (s *dmsetupDiskStaller) Setup(ctx context.Context) { + dev := s.device() + s.c.Run(ctx, s.c.All(), `sudo umount /mnt/data1`) + s.c.Run(ctx, s.c.All(), `sudo dmsetup remove_all`) + s.c.Run(ctx, s.c.All(), `echo "0 $(sudo blockdev --getsz `+dev+`) linear `+dev+` 0" | `+ + `sudo dmsetup create data1`) + s.c.Run(ctx, s.c.All(), `sudo mount /dev/mapper/data1 /mnt/data1`) +} + +func (s *dmsetupDiskStaller) Cleanup(ctx context.Context) { + s.c.Run(ctx, s.c.All(), `sudo umount /mnt/data1`) + s.c.Run(ctx, s.c.All(), `sudo dmsetup remove_all`) + s.c.Run(ctx, s.c.All(), `sudo mount /mnt/data1`) +} + +func (s *dmsetupDiskStaller) Stall(ctx context.Context, nodes option.NodeListOption) { + s.c.Run(ctx, nodes, `sudo dmsetup suspend --noflush --nolockfs data1`) +} + +func (s *dmsetupDiskStaller) Unstall(ctx context.Context, nodes option.NodeListOption) { + s.c.Run(ctx, nodes, `sudo dmsetup resume data1`) +} - time.Sleep(time.Duration(rand.Intn(5)) * time.Second) +func (s *dmsetupDiskStaller) DataDir() string { return "{store-dir}" } +func (s *dmsetupDiskStaller) LogDir() string { return "logs" } - t.Status("blocking storage") - c.Run(ctx, n, "charybdefs-nemesis --delay") +type cgroupDiskStaller struct { + t test.Test + c cluster.Cluster + readOrWrite []string + logsToo bool +} + +var _ diskStaller = (*cgroupDiskStaller)(nil) - result := <-errCh - if result.Err == nil { - t.Fatalf("expected an error: %s", result.Stdout) +func (s *cgroupDiskStaller) DataDir() string { return "{store-dir}" } +func (s *cgroupDiskStaller) LogDir() string { + return "logs" +} +func (s *cgroupDiskStaller) Setup(ctx context.Context) { + if s.logsToo { + s.c.Run(ctx, s.c.All(), "mkdir -p {store-dir}/logs") + s.c.Run(ctx, s.c.All(), "rm -f logs && ln -s {store-dir}/logs logs || true") } +} +func (s *cgroupDiskStaller) Cleanup(ctx context.Context) {} - // This test can also run in sanity check mode to make sure it doesn't fail - // due to the aggressive env vars above. - expectMsg := affectsDataDir || affectsLogDir +func (s *cgroupDiskStaller) Stall(ctx context.Context, nodes option.NodeListOption) { + // Shuffle the order of read and write stall initiation. + rand.Shuffle(len(s.readOrWrite), func(i, j int) { + s.readOrWrite[i], s.readOrWrite[j] = s.readOrWrite[j], s.readOrWrite[i] + }) + for _, rw := range s.readOrWrite { + s.setThroughput(ctx, nodes, rw, 1) + } +} - if expectMsg != strings.Contains(result.Stderr, "disk stall detected") { - t.Fatalf("unexpected output: %v", result.Err) - } else if elapsed := timeutil.Since(tStarted); !expectMsg && elapsed < dur { - t.Fatalf("no disk stall injected, but process terminated too early after %s (expected >= %s)", elapsed, dur) +func (s *cgroupDiskStaller) Unstall(ctx context.Context, nodes option.NodeListOption) { + for _, rw := range s.readOrWrite { + s.setThroughput(ctx, nodes, rw, 0) } +} - c.Run(ctx, n, "charybdefs-nemesis --clear") - c.Run(ctx, n, "sudo umount {store-dir}/faulty") +func (s *cgroupDiskStaller) device() (major, minor int) { + // TODO(jackson): Programmatically determine the device major,minor numbers. + // eg,: + // deviceName := getDevice(s.t, s.c.Spec()) + // `cat /proc/partitions` and find `deviceName` + switch s.c.Spec().Cloud { + case spec.GCE: + // ls -l /dev/nvme0n1 + // brw-rw---- 1 root disk 259, 0 Jan 26 20:05 /dev/nvme0n1 + return 259, 0 + default: + s.t.Fatalf("unsupported cloud %q", s.c.Spec().Cloud) + return 0, 0 + } +} + +func (s *cgroupDiskStaller) setThroughput( + ctx context.Context, nodes option.NodeListOption, readOrWrite string, bytesPerSecond int, +) { + major, minor := s.device() + s.c.Run(ctx, nodes, "sudo", "/bin/bash", "-c", fmt.Sprintf( + "'echo %d:%d %d > /sys/fs/cgroup/blkio/blkio.throttle.%s_bps_device'", + major, + minor, + bytesPerSecond, + readOrWrite, + )) +} + +// fuseDiskStaller uses a FUSE filesystem (charybdefs) to insert an artifical +// delay on all I/O. +type fuseDiskStaller struct { + t test.Test + c cluster.Cluster + stallLogs bool + stallData bool +} + +var _ diskStaller = (*fuseDiskStaller)(nil) + +func (s *fuseDiskStaller) DataDir() string { + if s.stallData { + return "{store-dir}/faulty" + } + return "{store-dir}/real" +} + +func (s *fuseDiskStaller) LogDir() string { + if s.stallLogs { + return "{store-dir}/faulty/logs" + } + return "{store-dir}/real/logs" +} + +func (s *fuseDiskStaller) Setup(ctx context.Context) { + if s.c.IsLocal() && runtime.GOOS != "linux" { + s.t.Fatalf("must run on linux os, found %s", runtime.GOOS) + } + s.t.Status("setting up charybdefs") + require.NoError(s.t, s.c.Install(ctx, s.t.L(), s.c.All(), "charybdefs")) + s.c.Run(ctx, s.c.All(), "sudo umount -f {store-dir}/faulty || true") + s.c.Run(ctx, s.c.All(), "mkdir -p {store-dir}/{real,faulty} || true") + s.c.Run(ctx, s.c.All(), "rm -f logs && ln -s {store-dir}/real/logs logs || true") + s.c.Run(ctx, s.c.All(), "sudo charybdefs {store-dir}/faulty -oallow_other,modules=subdir,subdir={store-dir}/real") + s.c.Run(ctx, s.c.All(), "sudo mkdir -p {store-dir}/real/logs") + s.c.Run(ctx, s.c.All(), "sudo chmod -R 777 {store-dir}/{real,faulty}") +} + +func (s *fuseDiskStaller) Cleanup(ctx context.Context) { + s.c.Run(ctx, s.c.All(), "sudo umount {store-dir}/faulty") +} + +func (s *fuseDiskStaller) Stall(ctx context.Context, nodes option.NodeListOption) { + s.c.Run(ctx, nodes, "charybdefs-nemesis --delay") +} + +func (s *fuseDiskStaller) Unstall(ctx context.Context, nodes option.NodeListOption) { + s.c.Run(ctx, nodes, "charybdefs-nemesis --clear") +} + +func getDevice(t test.Test, s spec.ClusterSpec) string { + switch s.Cloud { + case spec.GCE: + return "/dev/nvme0n1" + case spec.AWS: + return "/dev/nvme1n1" + default: + t.Fatalf("unsupported cloud %q", s.Cloud) + return "" + } } diff --git a/pkg/cmd/roachtest/tests/failover.go b/pkg/cmd/roachtest/tests/failover.go index cf406ff7a423..164e63138dfd 100644 --- a/pkg/cmd/roachtest/tests/failover.go +++ b/pkg/cmd/roachtest/tests/failover.go @@ -634,6 +634,7 @@ func makeFailer( c: c, startOpts: opts, startSettings: settings, + staller: &dmsetupDiskStaller{t: t, c: c}, } default: t.Fatalf("unknown failure mode %s", failureMode) @@ -735,27 +736,11 @@ type diskStallFailer struct { m cluster.Monitor startOpts option.StartOpts startSettings install.ClusterSettings -} - -func (f *diskStallFailer) device() string { - switch f.c.Spec().Cloud { - case spec.GCE: - return "/dev/nvme0n1" - case spec.AWS: - return "/dev/nvme1n1" - default: - f.t.Fatalf("unsupported cloud %q", f.c.Spec().Cloud) - return "" - } + staller diskStaller } func (f *diskStallFailer) Setup(ctx context.Context) { - dev := f.device() - f.c.Run(ctx, f.c.All(), `sudo umount /mnt/data1`) - f.c.Run(ctx, f.c.All(), `sudo dmsetup remove_all`) - f.c.Run(ctx, f.c.All(), `echo "0 $(sudo blockdev --getsz `+dev+`) linear `+dev+` 0" | `+ - `sudo dmsetup create data1`) - f.c.Run(ctx, f.c.All(), `sudo mount /dev/mapper/data1 /mnt/data1`) + f.staller.Setup(ctx) } func (f *diskStallFailer) Ready(ctx context.Context, m cluster.Monitor) { @@ -763,23 +748,21 @@ func (f *diskStallFailer) Ready(ctx context.Context, m cluster.Monitor) { } func (f *diskStallFailer) Cleanup(ctx context.Context) { - f.c.Run(ctx, f.c.All(), `sudo dmsetup resume data1`) - // We have to stop the cluster to remount /mnt/data1. + f.staller.Unstall(ctx, f.c.All()) + // We have to stop the cluster before cleaning up the staller. f.m.ExpectDeaths(int32(f.c.Spec().NodeCount)) f.c.Stop(ctx, f.t.L(), option.DefaultStopOpts(), f.c.All()) - f.c.Run(ctx, f.c.All(), `sudo umount /mnt/data1`) - f.c.Run(ctx, f.c.All(), `sudo dmsetup remove_all`) - f.c.Run(ctx, f.c.All(), `sudo mount /mnt/data1`) + f.staller.Cleanup(ctx) } func (f *diskStallFailer) Fail(ctx context.Context, nodeID int) { // Pebble's disk stall detector should crash the node. f.m.ExpectDeath() - f.c.Run(ctx, f.c.Node(nodeID), `sudo dmsetup suspend --noflush --nolockfs data1`) + f.staller.Stall(ctx, f.c.Node(nodeID)) } func (f *diskStallFailer) Recover(ctx context.Context, nodeID int) { - f.c.Run(ctx, f.c.Node(nodeID), `sudo dmsetup resume data1`) + f.staller.Unstall(ctx, f.c.Node(nodeID)) // Pebble's disk stall detector should have terminated the node, but in case // it didn't, we explicitly stop it first. f.c.Stop(ctx, f.t.L(), option.DefaultStopOpts(), f.c.Node(nodeID)) diff --git a/pkg/cmd/roachtest/tests/multitenant_shared_process.go b/pkg/cmd/roachtest/tests/multitenant_shared_process.go new file mode 100644 index 000000000000..15bb67da19e5 --- /dev/null +++ b/pkg/cmd/roachtest/tests/multitenant_shared_process.go @@ -0,0 +1,68 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "context" + "fmt" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" +) + +func registerMultiTenantSharedProcess(r registry.Registry) { + crdbNodeCount := 4 + + r.Add(registry.TestSpec{ + Name: "multitenant/shared-process/basic", + Owner: registry.OwnerMultiTenant, + Cluster: r.MakeClusterSpec(crdbNodeCount + 1), + Timeout: 1 * time.Hour, + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + var ( + appTenantName = "app" + tpccWarehouses = 500 + crdbNodes = c.Range(1, crdbNodeCount) + workloadNode = c.Node(crdbNodeCount + 1) + ) + t.Status(`set up Unified Architecture Cluster`) + c.Put(ctx, t.Cockroach(), "./cockroach", crdbNodes) + c.Put(ctx, t.DeprecatedWorkload(), "./workload", workloadNode) + + // In order to observe the app tenant's db console, create a secure + // cluster and add Admin roles to the system and app tenant. + clusterSettings := install.MakeClusterSettings(install.SecureOption(true)) + c.Start(ctx, t.L(), option.DefaultStartOpts(), clusterSettings, crdbNodes) + + sysConn := c.Conn(ctx, t.L(), crdbNodes.RandNode()[0]) + sysSQL := sqlutils.MakeSQLRunner(sysConn) + + createTenantAdminRole(t, "system", sysSQL) + + createInMemoryTenant(ctx, t, c, appTenantName, crdbNodes, true) + + t.Status(`initialize tpcc workload`) + initCmd := fmt.Sprintf(`./workload init tpcc --data-loader import --warehouses %d {pgurl%s:%s}`, + tpccWarehouses, crdbNodes, appTenantName) + c.Run(ctx, workloadNode, initCmd) + + t.Status(`run tpcc workload`) + runCmd := fmt.Sprintf(`./workload run tpcc --warehouses %d --tolerate-errors --duration 10m {pgurl%s:%s}`, + tpccWarehouses, crdbNodes, appTenantName) + c.Run(ctx, workloadNode, runCmd) + }, + }) +} diff --git a/pkg/cmd/roachtest/tests/multitenant_utils.go b/pkg/cmd/roachtest/tests/multitenant_utils.go index c57b102b92bc..f604b530e546 100644 --- a/pkg/cmd/roachtest/tests/multitenant_utils.go +++ b/pkg/cmd/roachtest/tests/multitenant_utils.go @@ -28,6 +28,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachprod/config" "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/stretchr/testify/require" ) @@ -321,3 +322,57 @@ func newTenantInstance( c.Run(ctx, c.Node(node), "chmod", "0600", filepath.Join("certs", key)) return &inst, nil } + +// createTenantAdminRole creates a role that can be used to log into a secure cluster's db console. +func createTenantAdminRole(t test.Test, tenantName string, tenantSQL *sqlutils.SQLRunner) { + username := "secure" + password := "roach" + tenantSQL.Exec(t, fmt.Sprintf(`CREATE ROLE %s WITH LOGIN PASSWORD '%s'`, username, password)) + tenantSQL.Exec(t, fmt.Sprintf(`GRANT ADMIN TO %s`, username)) + t.L().Printf(`Log into %s db console with username "%s" and password "%s"`, + tenantName, username, password) +} + +// createInMemoryTenant runs through the necessary steps to create an in-memory tenant without +// resource limits. +func createInMemoryTenant( + ctx context.Context, + t test.Test, + c cluster.Cluster, + tenantName string, + nodes option.NodeListOption, + secure bool, +) { + sysSQL := sqlutils.MakeSQLRunner(c.Conn(ctx, t.L(), nodes.RandNode()[0])) + sysSQL.Exec(t, "CREATE TENANT $1", tenantName) + sysSQL.Exec(t, "ALTER TENANT $1 START SERVICE SHARED", tenantName) + + // Opening a SQL session to a newly created in-process tenant may require a + // few retries. Unfortunately, the c.ConnE and MakeSQLRunner APIs do not make + // it clear if they eagerly open a session with the tenant or wait until the + // first query. Therefore, wrap connection opening and a ping to the tenant + // server in a retry loop. + var tenantSQL *sqlutils.SQLRunner + testutils.SucceedsSoon(t, func() error { + tenantConn, err := c.ConnE(ctx, t.L(), nodes.RandNode()[0]) + if err != nil { + return err + } + if err := tenantConn.Ping(); err != nil { + return err + } + tenantSQL = sqlutils.MakeSQLRunner(tenantConn) + return nil + }) + + // Currently, a tenant has by default a 10m RU burst limit, which can be + // reached during these tests. To prevent RU limit throttling, add 10B RUs to + // the tenant. + var tenantID int + sysSQL.QueryRow(t, `SELECT id FROM [SHOW TENANT $1]`, tenantName).Scan(&tenantID) + sysSQL.Exec(t, `SELECT crdb_internal.update_tenant_resource_limits($1, 10000000000, 0, +10000000000, now(), 0);`, tenantID) + if secure { + createTenantAdminRole(t, tenantName, tenantSQL) + } +} diff --git a/pkg/cmd/roachtest/tests/registry.go b/pkg/cmd/roachtest/tests/registry.go index 7ebf3a43c6f3..29038c96c5ba 100644 --- a/pkg/cmd/roachtest/tests/registry.go +++ b/pkg/cmd/roachtest/tests/registry.go @@ -87,6 +87,7 @@ func RegisterTests(r registry.Registry) { registerMultiTenantDistSQL(r) registerMultiTenantTPCH(r) registerMultiTenantUpgrade(r) + registerMultiTenantSharedProcess(r) registerNetwork(r) registerNodeJSPostgres(r) registerPebbleWriteThroughput(r) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index f18dc6119a53..89d04f30b8a2 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -806,7 +806,101 @@ INSERT INTO f VALUES (36, '[[]]'), (37, '[{"a": [0, "b"]}, null, 1]'), (38, '[[0, 1, 2], {"b": "c"}]'), - (39, '[[0, [1, 2]]]') + (39, '[[0, [1, 2]]]'), + (40, '[[0, 1, 2]]'), + (41, '[[{"a": {"b": []}}]]') + +query T +SELECT j FROM f@i WHERE j->0 @> '[0, 1, 2, 3]' ORDER BY k +---- + +query T +SELECT j FROM f@i WHERE j->0 @> '[0]' ORDER BY k +---- +[[0, 1, 2], {"b": "c"}] +[[0, [1, 2]]] +[[0, 1, 2]] + + +query T +SELECT j FROM f@i WHERE j->0->1 @> '[1, 2, 3]' ORDER BY k +---- + +query T +SELECT j FROM f@i WHERE j->0->1 @> '[1, 2]' ORDER BY k +---- +[[0, [1, 2]]] + +query T +SELECT j FROM f@i WHERE j->0 @> '{"a": {}}' ORDER BY k +---- +[{"a": {"b": "c"}}, "d", "e"] + +query T +SELECT j FROM f@i WHERE j->0 @> '{"a": {"b": "c"}}' ORDER BY k +---- +[{"a": {"b": "c"}}, "d", "e"] + +query T +SELECT j FROM f@i WHERE j->0->1 @> '{"a": {"b": []}}' ORDER BY k +---- + +query T +SELECT j FROM f@i WHERE j->0->0 @> '{"a": {"b": []}}' ORDER BY k +---- +[[{"a": {"b": []}}]] + +query T +SELECT j FROM f@i WHERE j->'a'->0 @> '1' ORDER BY k +---- +{"a": [1, 2]} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->0->'a' @> '{"b": "c"}' ORDER BY k +---- +[{"a": {"b": "c"}}, "d", "e"] + +query T +SELECT j FROM f@i WHERE j->0 <@ '[1, 2, 3]' ORDER BY k +---- +[1, 2, {"b": "c"}] +[[]] + +query T +SELECT j FROM f@i WHERE j->1 <@ '[1, 2, 3]' ORDER BY k +---- +[1, 2, {"b": "c"}] + +query T +SELECT j FROM f@i WHERE j->0->0 <@ '[1, 2, 3]' ORDER BY k +---- + +query T +SELECT j FROM f@i WHERE j->2 <@ '["d", "e"]' ORDER BY k +---- +[{"a": {"b": "c"}}, "d", "e"] + +query T +SELECT j FROM f@i WHERE j->0 <@ '{"a": {"b": "c"}}' ORDER BY k +---- +[{"a": {"b": "c"}}, "d", "e"] + +query T +SELECT j FROM f@i WHERE j->0 <@ '["a", "b"]' ORDER BY k +---- +["a"] +[[]] + +query T +SELECT j FROM f@i WHERE j->0 <@ '"a"' ORDER BY k +---- +["a"] + +query T +SELECT j FROM f@i WHERE j->0 <@ '1' ORDER BY k +---- +[1, 2, {"b": "c"}] query T SELECT j FROM f@i WHERE j->0 = '[]' ORDER BY k @@ -873,6 +967,7 @@ SELECT j FROM f@i WHERE j->0->0 = '0' ORDER BY k ---- [[0, 1, 2], {"b": "c"}] [[0, [1, 2]]] +[[0, 1, 2]] query T SELECT j FROM f@i WHERE j->0->1 = '[1, 2]' ORDER BY k diff --git a/pkg/sql/logictest/testdata/logic_test/udf b/pkg/sql/logictest/testdata/logic_test/udf index d6af565e73f8..e540adda97a1 100644 --- a/pkg/sql/logictest/testdata/logic_test/udf +++ b/pkg/sql/logictest/testdata/logic_test/udf @@ -2977,3 +2977,13 @@ distribution: local vectorized: true · • create function + +# Regression test for #96326. Strict UDFs with no arguments should not error +# while being called. +statement ok +CREATE FUNCTION f96326() RETURNS INT LANGUAGE SQL IMMUTABLE STRICT AS 'SELECT 1'; + +query I +SELECT f96326(); +---- +1 diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index ac6629c6055e..9088ad3232f8 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -1484,7 +1484,7 @@ vectorized: true right columns: (a, b_inverted_key) right fixed values: 1 column -# Inverted indices won't be used for queries of the form +# Inverted indices will be used for queries of the form # b->0 @> '{"b": "c"}' query T EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 @> '{"b": "c"}' @@ -1497,13 +1497,19 @@ vectorized: true │ estimated row count: 111 (missing stats) │ filter: (b->0) @> '{"b": "c"}' │ -└── • scan - columns: (b) - estimated row count: 1,000 (missing stats) - table: d@d_pkey - spans: FULL SCAN +└── • index join + │ columns: (b) + │ estimated row count: 111 (missing stats) + │ table: d@d_pkey + │ key columns: a + │ + └── • scan + columns: (a) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /Arr/"b"/"c"-/Arr/"b"/"c"/PrefixEnd -# Inverted indices won't be used for queries of the form +# Inverted indices will be used for queries of the form # b->0 <@ '{"b": "c"}' query T EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 <@ '{"b": "c"}' @@ -1516,11 +1522,183 @@ vectorized: true │ estimated row count: 333 (missing stats) │ filter: (b->0) <@ '{"b": "c"}' │ -└── • scan - columns: (b) - estimated row count: 1,000 (missing stats) - table: d@d_pkey - spans: FULL SCAN +└── • index join + │ columns: (b) + │ estimated row count: 111 (missing stats) + │ table: d@d_pkey + │ key columns: a + │ + └── • project + │ columns: (a) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ estimated row count: 111 (missing stats) + │ inverted column: b_inverted_key + │ num spans: 3 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /[]-/{} /Arr/{}-/Arr/{}/PrefixEnd /Arr/"b"/"c"-/Arr/"b"/"c"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 <@ '1' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (b) +│ estimated row count: 333 (missing stats) +│ filter: (b->0) <@ '1' +│ +└── • index join + │ columns: (b) + │ estimated row count: 111 (missing stats) + │ table: d@d_pkey + │ key columns: a + │ + └── • project + │ columns: (a) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ estimated row count: 111 (missing stats) + │ inverted column: b_inverted_key + │ num spans: 3 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /1-/1/PrefixEnd /[]-/{} /Arr/1-/Arr/1/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 @> '1' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (b) +│ estimated row count: 111 (missing stats) +│ filter: (b->0) @> '1' +│ +└── • index join + │ columns: (b) + │ estimated row count: 111 (missing stats) + │ table: d@d_pkey + │ key columns: a + │ + └── • project + │ columns: (a) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ estimated row count: 111 (missing stats) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /Arr/1-/Arr/1/PrefixEnd /Arr/Arr/1-/Arr/Arr/1/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 @> '[1, 2]' +---- +distribution: local +vectorized: true +· +• project +│ columns: (b) +│ +└── • lookup join (inner) + │ columns: (a, b) + │ estimated row count: 12 (missing stats) + │ table: d@d_pkey + │ equality: (a) = (a) + │ equality cols are key + │ pred: (b->0) @> '[1, 2]' + │ + └── • project + │ columns: (a) + │ + └── • zigzag join + columns: (a, b_inverted_key, a, b_inverted_key) + estimated row count: 12 (missing stats) + left table: d@foo_inv + left columns: (a, b_inverted_key) + left fixed values: 1 column + right table: d@foo_inv + right columns: (a, b_inverted_key) + right fixed values: 1 column + +query T +EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 <@ '[1, 2]' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (b) +│ estimated row count: 333 (missing stats) +│ filter: (b->0) <@ '[1, 2]' +│ +└── • index join + │ columns: (b) + │ estimated row count: 111 (missing stats) + │ table: d@d_pkey + │ key columns: a + │ + └── • project + │ columns: (a) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ estimated row count: 111 (missing stats) + │ inverted column: b_inverted_key + │ num spans: 8 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /1-/1/PrefixEnd /2-/2/PrefixEnd /[]-/{} /Arr/1-/Arr/1/PrefixEnd /Arr/2-/Arr/2/PrefixEnd /Arr/[]-/Arr/{} /Arr/Arr/1-/Arr/Arr/1/PrefixEnd /Arr/Arr/2-/Arr/Arr/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT b FROM d WHERE (b->0 @> '[1, 2]') AND (b->1 <@ '[1]') +---- +distribution: local +vectorized: true +· +• project +│ columns: (b) +│ +└── • lookup join (inner) + │ columns: (a, b) + │ estimated row count: 4 (missing stats) + │ table: d@d_pkey + │ equality: (a) = (a) + │ equality cols are key + │ pred: ((b->0) @> '[1, 2]') AND ((b->1) <@ '[1]') + │ + └── • project + │ columns: (a) + │ + └── • zigzag join + columns: (a, b_inverted_key, a, b_inverted_key) + estimated row count: 12 (missing stats) + left table: d@foo_inv + left columns: (a, b_inverted_key) + left fixed values: 1 column + right table: d@foo_inv + right columns: (a, b_inverted_key) + right fixed values: 1 column + # Stats reflect the following, with some histogram buckets removed: # insert into d select g, '[1,2]' from generate_series(1,1000) g(g); diff --git a/pkg/sql/opt/invertedidx/json_array.go b/pkg/sql/opt/invertedidx/json_array.go index ff91d93ad594..6a17f70bec8b 100644 --- a/pkg/sql/opt/invertedidx/json_array.go +++ b/pkg/sql/opt/invertedidx/json_array.go @@ -612,14 +612,6 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValContainsCondition( return inverted.NonInvertedColExpression{} } - // Not using inverted indices, yet, for filters of the form - // j->0 @> '{"b": "c"}' or j->0 <@ '{"b": "c"}' - for i := range keys { - if _, ok := keys[i].(*tree.DString); !ok { - return inverted.NonInvertedColExpression{} - } - } - // Build a new JSON object with the collected keys and val. obj := buildObject(keys, val.JSON) @@ -647,6 +639,29 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValContainsCondition( invertedExpr = inverted.Or(invertedExpr, expr) } } + + // If a key is of the type DInt, the InvertedExpression generated is + // not tight. This is because key encodings for JSON arrays don't + // contain the respective index positions for each of their elements. + // A JSON array of the form ["a", 31] will have the following encoding + // into an index key: + // + // 1/2/arr/a/pk1 + // 1/2/arr/31/pk1 + // + // where arr is an ARRAY type tag used to indicate that the next key is + // part of an array, 1 is the table id, 2 is the inverted index id and + // pk is a primary key of a row in the table. Since the array + // elements do not have their respective indices stored in + // the encoding, the original filter needs to be applied after the initial + // scan. + for i := range keys { + if _, ok := keys[i].(*tree.DInt); ok { + invertedExpr.SetNotTight() + break + } + } + return invertedExpr } diff --git a/pkg/sql/opt/invertedidx/json_array_test.go b/pkg/sql/opt/invertedidx/json_array_test.go index 542b3d869ec3..dd1d3f9b4bd4 100644 --- a/pkg/sql/opt/invertedidx/json_array_test.go +++ b/pkg/sql/opt/invertedidx/json_array_test.go @@ -546,6 +546,54 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: false, remainingFilters: "j->'a'->0 = '1' OR j->'a'->1 = '1'", }, + { + filters: "j->0 @> '2'", + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->0 @> '2'", + }, + { + filters: "j->0 <@ '2'", + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->0 <@ '2'", + }, + { + filters: "j->0 @> '[1,2]'", + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->0 @> '[1,2]'", + }, + { + filters: "j->0 <@ '[1,2]'", + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->0 <@ '[1,2]'", + }, + { + filters: `j->0 <@ '{"b": "c"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: `j->0 <@ '{"b": "c"}'`, + }, + { + filters: `j->0 @> '{"b": "c"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: `j->0 @> '{"b": "c"}'`, + }, { // The inner most expression is not a fetch val expression with an // indexed column on the left. diff --git a/pkg/sql/opt/optbuilder/scalar.go b/pkg/sql/opt/optbuilder/scalar.go index a11d59aea4a7..7816cfa430b7 100644 --- a/pkg/sql/opt/optbuilder/scalar.go +++ b/pkg/sql/opt/optbuilder/scalar.go @@ -744,7 +744,7 @@ func (b *Builder) buildUDF( // // CASE WHEN arg1 IS NULL OR arg2 IS NULL OR ... THEN NULL ELSE udf() END // - if !o.CalledOnNullInput { + if !o.CalledOnNullInput && len(args) > 0 { var anyArgIsNull opt.ScalarExpr for i := range args { // Note: We do NOT use a TupleIsNullExpr here if the argument is a diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index 5973ca5405fe..e0e529244c26 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -2897,6 +2897,57 @@ project └── filters └── (j:4->0) = '{"a": "b"}' [outer=(4), immutable] +# Generate an inverted scan when right side of the equality is an array. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' = '["b"]' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null + │ ├── inverted constraint: /7/1 + │ │ └── spans: ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ └── key: (1) + └── filters + └── (j:4->'a') = '["b"]' [outer=(4), immutable] + +# Generate an inverted scan when right side of the equality is an object. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' = '{"b": "c"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null + │ ├── inverted constraint: /7/1 + │ │ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── key: (1) + └── filters + └── (j:4->'a') = '{"b": "c"}' [outer=(4), immutable] + + # Generate an inverted scan when the index of the fetch val operator is # an integer and the right side to the equality is a JSON # array. Since the inverted expression is not tight, a @@ -2924,9 +2975,9 @@ project └── filters └── (j:4->0) = '[1, 2]' [outer=(4), immutable] -# Do not generate an inverted scan when the index of the fetch val operator -# is not a string and there is a containment operator. -opt expect-not=GenerateInvertedIndexScans +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON object. +opt expect=GenerateInvertedIndexScans SELECT k FROM b WHERE j->0 @> '{"b": "c"}' ---- project @@ -2938,16 +2989,21 @@ project ├── immutable ├── key: (1) ├── fd: (1)-->(4) - ├── scan b + ├── index-join b │ ├── columns: k:1!null j:4 │ ├── key: (1) - │ └── fd: (1)-->(4) + │ ├── fd: (1)-->(4) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null + │ ├── inverted constraint: /7/1 + │ │ └── spans: ["7\x00\x03b\x00\x01\x12c\x00\x01", "7\x00\x03b\x00\x01\x12c\x00\x01"] + │ └── key: (1) └── filters └── (j:4->0) @> '{"b": "c"}' [outer=(4), immutable] -# Do not generate an inverted scan when the index of the fetch val operator -# is not a string and there is a containment operator. -opt expect-not=GenerateInvertedIndexScans +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON object. +opt expect=GenerateInvertedIndexScans SELECT k FROM b WHERE j->0 <@ '{"b": "c"}' ---- project @@ -2959,17 +3015,110 @@ project ├── immutable ├── key: (1) ├── fd: (1)-->(4) - ├── scan b + ├── index-join b │ ├── columns: k:1!null j:4 │ ├── key: (1) - │ └── fd: (1)-->(4) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /7 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"] + │ │ └── ["7\x00\x03b\x00\x01\x12c\x00\x01", "7\x00\x03b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:7!null + │ ├── inverted constraint: /7/1 + │ │ └── spans + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"] + │ │ └── ["7\x00\x03b\x00\x01\x12c\x00\x01", "7\x00\x03b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(7) └── filters └── (j:4->0) <@ '{"b": "c"}' [outer=(4), immutable] +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON array. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->0 <@ '[1, 2]' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /7 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x01*\x02\x00", "7\x00\x01*\x02\x00"] + │ │ ├── ["7\x00\x01*\x04\x00", "7\x00\x01*\x04\x00"] + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] + │ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"] + │ │ ├── ["7\x00\x03\x00\x018", "7\x00\x03\x00\x018"] + │ │ ├── ["7\x00\x03\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7\x00\x03\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x03\x00\x01*\x04\x00"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:7!null + │ ├── inverted constraint: /7/1 + │ │ └── spans + │ │ ├── ["7\x00\x01*\x02\x00", "7\x00\x01*\x02\x00"] + │ │ ├── ["7\x00\x01*\x04\x00", "7\x00\x01*\x04\x00"] + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] + │ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"] + │ │ ├── ["7\x00\x03\x00\x018", "7\x00\x03\x00\x018"] + │ │ ├── ["7\x00\x03\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7\x00\x03\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x03\x00\x01*\x04\x00"] + │ ├── key: (1) + │ └── fd: (1)-->(7) + └── filters + └── (j:4->0) <@ '[1, 2]' [outer=(4), immutable] -# Generate an inverted scan when right side of the equality is an array. +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON array. opt expect=GenerateInvertedIndexScans -SELECT k FROM b WHERE j->'a' = '["b"]' +SELECT k FROM b WHERE j->0 @> '[1, 2]' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inner-join (lookup b) + ├── columns: k:1!null j:4 + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── inner-join (zigzag b@j_inv_idx b@j_inv_idx) + │ ├── columns: k:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [7] = ['\x370003000300012a0200'] + │ ├── right fixed columns: [7] = ['\x370003000300012a0400'] + │ └── filters (true) + └── filters + └── (j:4->0) @> '[1, 2]' [outer=(4), immutable] + + +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON string. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->0 @> '"a"' ---- project ├── columns: k:1!null @@ -2984,17 +3133,29 @@ project │ ├── columns: k:1!null j:4 │ ├── key: (1) │ ├── fd: (1)-->(4) - │ └── scan b@j_inv_idx + │ └── inverted-filter │ ├── columns: k:1!null - │ ├── inverted constraint: /7/1 - │ │ └── spans: ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] - │ └── key: (1) + │ ├── inverted expression: /7 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x03\x00\x01\x12a\x00\x01", "7\x00\x03\x00\x01\x12a\x00\x01"] + │ │ └── ["7\x00\x03\x00\x03\x00\x01\x12a\x00\x01", "7\x00\x03\x00\x03\x00\x01\x12a\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:7!null + │ ├── inverted constraint: /7/1 + │ │ └── spans + │ │ ├── ["7\x00\x03\x00\x01\x12a\x00\x01", "7\x00\x03\x00\x01\x12a\x00\x01"] + │ │ └── ["7\x00\x03\x00\x03\x00\x01\x12a\x00\x01", "7\x00\x03\x00\x03\x00\x01\x12a\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(7) └── filters - └── (j:4->'a') = '["b"]' [outer=(4), immutable] + └── (j:4->0) @> '"a"' [outer=(4), immutable] -# Generate an inverted scan when right side of the equality is an object. +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON string. opt expect=GenerateInvertedIndexScans -SELECT k FROM b WHERE j->'a' = '{"b": "c"}' +SELECT k FROM b WHERE j->0 <@ '"a"' ---- project ├── columns: k:1!null @@ -3009,13 +3170,102 @@ project │ ├── columns: k:1!null j:4 │ ├── key: (1) │ ├── fd: (1)-->(4) - │ └── scan b@j_inv_idx + │ └── inverted-filter │ ├── columns: k:1!null - │ ├── inverted constraint: /7/1 - │ │ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] - │ └── key: (1) + │ ├── inverted expression: /7 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x01\x12a\x00\x01", "7\x00\x01\x12a\x00\x01"] + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ └── ["7\x00\x03\x00\x01\x12a\x00\x01", "7\x00\x03\x00\x01\x12a\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:7!null + │ ├── inverted constraint: /7/1 + │ │ └── spans + │ │ ├── ["7\x00\x01\x12a\x00\x01", "7\x00\x01\x12a\x00\x01"] + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ └── ["7\x00\x03\x00\x01\x12a\x00\x01", "7\x00\x03\x00\x01\x12a\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(7) └── filters - └── (j:4->'a') = '{"b": "c"}' [outer=(4), immutable] + └── (j:4->0) <@ '"a"' [outer=(4), immutable] + +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON integer. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->0 @> '1' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /7 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7\x00\x03\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x03\x00\x01*\x02\x00"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:7!null + │ ├── inverted constraint: /7/1 + │ │ └── spans + │ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7\x00\x03\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x03\x00\x01*\x02\x00"] + │ ├── key: (1) + │ └── fd: (1)-->(7) + └── filters + └── (j:4->0) @> '1' [outer=(4), immutable] + +# Generate an inverted scan when the index of the fetch val operator +# is an integer and there is a containment operator with a JSON integer. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->0 <@ '1' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /7 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x01*\x02\x00", "7\x00\x01*\x02\x00"] + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ └── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:7!null + │ ├── inverted constraint: /7/1 + │ │ └── spans + │ │ ├── ["7\x00\x01*\x02\x00", "7\x00\x01*\x02\x00"] + │ │ ├── ["7\x00\x018", "7\x00\x018"] + │ │ └── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] + │ ├── key: (1) + │ └── fd: (1)-->(7) + └── filters + └── (j:4->0) <@ '1' [outer=(4), immutable] # Query using the fetch val and equality operators in a conjunction. opt expect=GenerateInvertedIndexScans disable=GenerateInvertedIndexZigzagJoins diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 0fe94c9a6921..3167f884b9ca 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -6429,6 +6429,9 @@ Parameters:` + randgencfg.ConfigDoc, ), // Used to configure the tenant token bucket. See UpdateTenantResourceLimits. + // + // TODO(multitenantTeam): use tenantName instead of tenantID. See issue: + // https://github.com/cockroachdb/cockroach/issues/96176 "crdb_internal.update_tenant_resource_limits": makeBuiltin( tree.FunctionProperties{ Category: builtinconstants.CategoryMultiTenancy,