From 4dbbafa8f1785f5cad90947efff670b914bca4b2 Mon Sep 17 00:00:00 2001
From: Tobias Grieger <tobias.b.grieger@gmail.com>
Date: Tue, 22 Nov 2022 10:06:14 +0100
Subject: [PATCH 1/9] roachtest: fix transfer-leases

`checkNoLeases` verifies that once a node is drained, for each range
there is one replica of the other nodes that sees the lease on one of
the non-drained stores. The reason it asks justs for *one* replica to
have the lease as opposed to all of them is because some followers may
be behind.

However, even just the assumption that there is one can be violated.
The drain succeeds once the draining node has locally seen the lease
transfer succeed. It is likely the raft leader at this point, so it
will be the first one to see this event. Other nodes will only see
it after one additional round-trip (when they learn that the log
index has committed, and then go and apply it).

So going and looking for a replica that sees the new lease immediately
after the drain succeeds may fail.

Work around this by sleeping for one second before checking, which ought
to be enough, and is also a small enough delay to make sure that if
leases are actually not getting transferred, the check will continue to
fail (non-cooperative lease failover is on the order of multiple
seconds).

This commit also improves a debugging file which was previously
clobbered over the multiple iterations of the surrounding loop.

It also makes it clearer that we're pulling the range data from
each node in turn (we were previously hitting otherNodeID but asking
it to proxy to the node. Now we're hitting each node directly without
relying on the internal redirect, which is less confusing).

See: https://github.com/cockroachdb/cockroach/issues/91801#issuecomment-1323331606

Fixes #91801.

Epic: none
Release note: None
---
 pkg/cmd/roachtest/tests/quit.go | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/pkg/cmd/roachtest/tests/quit.go b/pkg/cmd/roachtest/tests/quit.go
index ed82b57ca69a..9edf2acb9a60 100644
--- a/pkg/cmd/roachtest/tests/quit.go
+++ b/pkg/cmd/roachtest/tests/quit.go
@@ -202,10 +202,6 @@ ALTER TABLE t SPLIT AT TABLE generate_series(%[1]d,%[1]d-99,-1)`, i)); err != ni
 // checkNoLeases verifies that no range has a lease on the node
 // that's just been shut down.
 func (q *quitTest) checkNoLeases(ctx context.Context, nodeID int) {
-	// We need to use SQL against a node that's not the one we're
-	// shutting down.
-	otherNodeID := 1 + nodeID%q.c.Spec().NodeCount
-
 	// Now we're going to check two things:
 	//
 	// 1) *immediately*, that every range in the cluster has a lease
@@ -218,12 +214,21 @@ func (q *quitTest) checkNoLeases(ctx context.Context, nodeID int) {
 	//    drain does not wait for followers to catch up.
 	//    https://github.com/cockroachdb/cockroach/issues/47100
 	//
+	//    Additionally, the way the test is architected right now has a tiny race:
+	//    when n3 has transferred the lease, the result is visible to n3, but we
+	//    are only checking the other nodes. Even if some of them must have acked
+	//    the raft log entry, there is an additional delay until they apply it. So
+	//    we may still, in this test, find that a node has drained and there is a
+	//    lease transfer that is not yet visible (= has applied) on any other
+	//    node. To work around this, we sleep for one second prior to checking.
+	//
 	// 2) *eventually* that every other node than nodeID has no range
 	//    replica whose lease refers to nodeID, i.e. the followers
 	//    have all caught up.
 	//    Note: when issue #47100 is fixed, this 2nd condition
 	//    must be true immediately -- drain is then able to wait
 	//    for all followers to learn who the new leaseholder is.
+	time.Sleep(time.Second)
 
 	if err := testutils.SucceedsSoonError(func() error {
 		// To achieve that, we ask first each range in turn for its range
@@ -248,18 +253,18 @@ func (q *quitTest) checkNoLeases(ctx context.Context, nodeID int) {
 			// Get the report via HTTP.
 			// Flag -s is to remove progress on stderr, so that the buffer
 			// contains the JSON of the response and nothing else.
-			adminAddrs, err := q.c.InternalAdminUIAddr(ctx, q.t.L(), q.c.Node(otherNodeID))
+			adminAddrs, err := q.c.InternalAdminUIAddr(ctx, q.t.L(), q.c.Node(i))
 			if err != nil {
 				q.Fatal(err)
 			}
-			result, err := q.c.RunWithDetailsSingleNode(ctx, q.t.L(), q.c.Node(otherNodeID),
-				"curl", "-s", fmt.Sprintf("http://%s/_status/ranges/%d",
-					adminAddrs[0], i))
+			result, err := q.c.RunWithDetailsSingleNode(ctx, q.t.L(), q.c.Node(i),
+				"curl", "-s", fmt.Sprintf("http://%s/_status/ranges/local",
+					adminAddrs[0]))
 			if err != nil {
 				q.Fatal(err)
 			}
 			// Persist the response to artifacts to aid debugging. See #75438.
-			_ = os.WriteFile(filepath.Join(q.t.ArtifactsDir(), "status_ranges.json"),
+			_ = os.WriteFile(filepath.Join(q.t.ArtifactsDir(), fmt.Sprintf("status_ranges_n%d.json", i)),
 				[]byte(result.Stdout), 0644,
 			)
 			// We need just a subset of the response. Make an ad-hoc
@@ -342,10 +347,11 @@ func (q *quitTest) checkNoLeases(ctx context.Context, nodeID int) {
 		q.Fatal(err)
 	}
 
+	// For good measure, also write to the table. This ensures it remains
+	// available. We pick a node that's not the drained node.
+	otherNodeID := 1 + nodeID%q.c.Spec().NodeCount
 	db := q.c.Conn(ctx, q.t.L(), otherNodeID)
 	defer db.Close()
-	// For good measure, also write to the table. This ensures it
-	// remains available.
 	if _, err := db.ExecContext(ctx, `UPDATE t SET y = y + 1`); err != nil {
 		q.Fatal(err)
 	}

From 00c73302968dd61f82b582849eb4f676d397183d Mon Sep 17 00:00:00 2001
From: Andrei Matei <andrei@cockroachlabs.com>
Date: Mon, 5 Dec 2022 11:13:00 -0500
Subject: [PATCH 2/9] base: remove old testing knobs

The testing knobs for startup migrations don't exist anymore.

Release note: None
Epic: None
---
 pkg/base/testing_knobs.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pkg/base/testing_knobs.go b/pkg/base/testing_knobs.go
index ac792978a9f4..ec3fc5dd9cef 100644
--- a/pkg/base/testing_knobs.go
+++ b/pkg/base/testing_knobs.go
@@ -29,7 +29,6 @@ type TestingKnobs struct {
 	SQLTypeSchemaChanger         ModuleTestingKnobs
 	GCJob                        ModuleTestingKnobs
 	PGWireTestingKnobs           ModuleTestingKnobs
-	StartupMigrationManager      ModuleTestingKnobs
 	DistSQL                      ModuleTestingKnobs
 	SQLEvalContext               ModuleTestingKnobs
 	NodeLiveness                 ModuleTestingKnobs

From b849f61b025eaa4a4bb1dff1194ca3fc7da16df3 Mon Sep 17 00:00:00 2001
From: Andrei Matei <andrei@cockroachlabs.com>
Date: Mon, 5 Dec 2022 15:47:44 -0500
Subject: [PATCH 3/9] testutils: add missing comment

Release note: None
Epic: None
---
 pkg/testutils/serverutils/test_tenant_shim.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/testutils/serverutils/test_tenant_shim.go b/pkg/testutils/serverutils/test_tenant_shim.go
index 31d995360b92..c7a755837ccf 100644
--- a/pkg/testutils/serverutils/test_tenant_shim.go
+++ b/pkg/testutils/serverutils/test_tenant_shim.go
@@ -161,7 +161,7 @@ type TestTenantInterface interface {
 	// as an interface{}.
 	RangeDescIteratorFactory() interface{}
 
-	// !!!
+	//Tracer returns a reference to the tenant's Tracer.
 	Tracer() *tracing.Tracer
 
 	// TODO(irfansharif): We'd benefit from an API to construct a *gosql.DB, or

From 33ead8e62a39b3fa68c4a64c0eb504cf78b4df53 Mon Sep 17 00:00:00 2001
From: Rafi Shamim <rafi@cockroachlabs.com>
Date: Thu, 1 Dec 2022 11:34:02 -0500
Subject: [PATCH 4/9] tree: improve type-checking for placeholders with
 ambiguous type

The key fix is to change the typeCheckSplitExprs function so that it marks _all_
placeholder indexes. This then causes the existing type-checking logic
in typeCheckOverloadedExprs to check all placeholder expressions, rather
than just ones that don't have type hints.

Release note (bug fix): Prepared statements that use type hints can now
succeed type-checking in more cases when the placeholder type is
ambiguous.
---
 .../pgwire/testdata/pgtest/collated_string    | 73 +++++++++++++++++++
 pkg/sql/sem/tree/constant.go                  |  5 ++
 pkg/sql/sem/tree/overload.go                  | 30 +++++---
 pkg/sql/sem/tree/placeholders.go              |  7 +-
 pkg/sql/sem/tree/type_check.go                | 41 ++++++++---
 pkg/sql/sem/tree/type_check_internal_test.go  |  3 +-
 pkg/sql/sem/tree/type_check_test.go           | 28 +++++++
 7 files changed, 167 insertions(+), 20 deletions(-)
 create mode 100644 pkg/sql/pgwire/testdata/pgtest/collated_string

diff --git a/pkg/sql/pgwire/testdata/pgtest/collated_string b/pkg/sql/pgwire/testdata/pgtest/collated_string
new file mode 100644
index 000000000000..42fd3babae0b
--- /dev/null
+++ b/pkg/sql/pgwire/testdata/pgtest/collated_string
@@ -0,0 +1,73 @@
+send
+Query {"String": "DROP TABLE IF EXISTS collated_string_table"}
+Query {"String": "CREATE TABLE collated_string_table (id UUID PRIMARY KEY, email TEXT COLLATE \"en-US-u-ks-level2\" NOT NULL)"}
+Query {"String": "CREATE UNIQUE INDEX ON collated_string_table(email)"}
+----
+
+until ignore=NoticeResponse
+ReadyForQuery
+ReadyForQuery
+ReadyForQuery
+----
+{"Type":"CommandComplete","CommandTag":"DROP TABLE"}
+{"Type":"ReadyForQuery","TxStatus":"I"}
+{"Type":"CommandComplete","CommandTag":"CREATE TABLE"}
+{"Type":"ReadyForQuery","TxStatus":"I"}
+{"Type":"CommandComplete","CommandTag":"CREATE INDEX"}
+{"Type":"ReadyForQuery","TxStatus":"I"}
+
+send
+Parse {"Query": "INSERT INTO collated_string_table (email,id) VALUES ($1,$2)", "Name": "insert_0"}
+Describe {"Name": "insert_0", "ObjectType": "S"}
+Bind {"ParameterFormatCodes": [1,1], "PreparedStatement": "insert_0", "Parameters": [{"binary":"757365722d31406578616d706c652e636f6d"}, {"binary":"00e8febeba494ee0ae269550e08cae0f"}]}
+Execute
+Sync
+----
+
+until
+ReadyForQuery
+----
+{"Type":"ParseComplete"}
+{"Type":"ParameterDescription","ParameterOIDs":[25,2950]}
+{"Type":"NoData"}
+{"Type":"BindComplete"}
+{"Type":"CommandComplete","CommandTag":"INSERT 0 1"}
+{"Type":"ReadyForQuery","TxStatus":"I"}
+
+# Check without sending ParameterOIDs.
+send
+Parse {"Query": "SELECT u0.id, u0.email FROM collated_string_table AS u0 WHERE (u0.email = $1)", "Name": "select_0"}
+Describe {"Name": "select_0", "ObjectType": "S"}
+Bind {"ParameterFormatCodes": [1], "PreparedStatement": "select_0", "Parameters": [{"binary":"555345522d32406578616d706c652e636f6d"}]}
+Execute
+Sync
+----
+
+until ignore_table_oids
+ReadyForQuery
+----
+{"Type":"ParseComplete"}
+{"Type":"ParameterDescription","ParameterOIDs":[25]}
+{"Type":"RowDescription","Fields":[{"Name":"id","TableOID":0,"TableAttributeNumber":1,"DataTypeOID":2950,"DataTypeSize":16,"TypeModifier":-1,"Format":0},{"Name":"email","TableOID":0,"TableAttributeNumber":2,"DataTypeOID":25,"DataTypeSize":-1,"TypeModifier":-1,"Format":0}]}
+{"Type":"BindComplete"}
+{"Type":"CommandComplete","CommandTag":"SELECT 0"}
+{"Type":"ReadyForQuery","TxStatus":"I"}
+
+# Check with sending ParameterOIDs.
+send
+Parse {"Query": "SELECT u0.id, u0.email FROM collated_string_table AS u0 WHERE (u0.email = $1)", "Name": "select_1", "ParameterOIDs": [25]}
+Describe {"Name": "select_1", "ObjectType": "S"}
+Bind {"ParameterFormatCodes": [1], "PreparedStatement": "select_1", "Parameters": [{"binary":"555345522d32406578616d706c652e636f6d"}]}
+Execute
+Sync
+----
+
+until ignore_table_oids
+ReadyForQuery
+----
+{"Type":"ParseComplete"}
+{"Type":"ParameterDescription","ParameterOIDs":[25]}
+{"Type":"RowDescription","Fields":[{"Name":"id","TableOID":0,"TableAttributeNumber":1,"DataTypeOID":2950,"DataTypeSize":16,"TypeModifier":-1,"Format":0},{"Name":"email","TableOID":0,"TableAttributeNumber":2,"DataTypeOID":25,"DataTypeSize":-1,"TypeModifier":-1,"Format":0}]}
+{"Type":"BindComplete"}
+{"Type":"CommandComplete","CommandTag":"SELECT 0"}
+{"Type":"ReadyForQuery","TxStatus":"I"}
diff --git a/pkg/sql/sem/tree/constant.go b/pkg/sql/sem/tree/constant.go
index 2e8b4afadc73..c6ae56790b0a 100644
--- a/pkg/sql/sem/tree/constant.go
+++ b/pkg/sql/sem/tree/constant.go
@@ -64,6 +64,11 @@ func isConstant(expr Expr) bool {
 	return ok
 }
 
+func isPlaceholder(expr Expr) bool {
+	_, isPlaceholder := StripParens(expr).(*Placeholder)
+	return isPlaceholder
+}
+
 func typeCheckConstant(
 	ctx context.Context, semaCtx *SemaContext, c Constant, desired *types.T,
 ) (ret TypedExpr, err error) {
diff --git a/pkg/sql/sem/tree/overload.go b/pkg/sql/sem/tree/overload.go
index 4e592c4555e8..d8a5361184b5 100644
--- a/pkg/sql/sem/tree/overload.go
+++ b/pkg/sql/sem/tree/overload.go
@@ -679,9 +679,7 @@ func (s *overloadTypeChecker) typeCheckOverloadedExprs(
 	} else {
 		s.typedExprs = make([]TypedExpr, len(s.exprs))
 	}
-	s.constIdxs, s.placeholderIdxs, s.resolvableIdxs = typeCheckSplitExprs(
-		semaCtx, s.exprs,
-	)
+	s.constIdxs, s.placeholderIdxs, s.resolvableIdxs = typeCheckSplitExprs(s.exprs)
 
 	// If no overloads are provided, just type check parameters and return.
 	if numOverloads == 0 {
@@ -726,8 +724,18 @@ func (s *overloadTypeChecker) typeCheckOverloadedExprs(
 	// out impossible candidates based on identical parameters. For instance,
 	// f(int, float) is not a possible candidate for the expression f($1, $1).
 
-	// Filter out overloads on resolved types.
+	// Filter out overloads on resolved types. This includes resolved placeholders
+	// and any other resolvable exprs.
+	var typeableIdxs = util.FastIntSet{}
 	for i, ok := s.resolvableIdxs.Next(0); ok; i, ok = s.resolvableIdxs.Next(i + 1) {
+		typeableIdxs.Add(i)
+	}
+	for i, ok := s.placeholderIdxs.Next(0); ok; i, ok = s.placeholderIdxs.Next(i + 1) {
+		if !semaCtx.isUnresolvedPlaceholder(s.exprs[i]) {
+			typeableIdxs.Add(i)
+		}
+	}
+	for i, ok := typeableIdxs.Next(0); ok; i, ok = typeableIdxs.Next(i + 1) {
 		paramDesired := types.Any
 
 		// If all remaining candidates require the same type for this parameter,
@@ -789,10 +797,10 @@ func (s *overloadTypeChecker) typeCheckOverloadedExprs(
 	}
 
 	var homogeneousTyp *types.T
-	if !s.resolvableIdxs.Empty() {
-		idx, _ := s.resolvableIdxs.Next(0)
+	if !typeableIdxs.Empty() {
+		idx, _ := typeableIdxs.Next(0)
 		homogeneousTyp = s.typedExprs[idx].ResolvedType()
-		for i, ok := s.resolvableIdxs.Next(idx); ok; i, ok = s.resolvableIdxs.Next(i + 1) {
+		for i, ok := typeableIdxs.Next(idx); ok; i, ok = typeableIdxs.Next(i + 1) {
 			if !homogeneousTyp.Equivalent(s.typedExprs[i].ResolvedType()) {
 				homogeneousTyp = nil
 				break
@@ -1196,8 +1204,9 @@ func defaultTypeCheck(
 	}
 	for i, ok := s.placeholderIdxs.Next(0); ok; i, ok = s.placeholderIdxs.Next(i + 1) {
 		if errorOnPlaceholders {
-			_, err := s.exprs[i].TypeCheck(ctx, semaCtx, types.Any)
-			return err
+			if _, err := s.exprs[i].TypeCheck(ctx, semaCtx, types.Any); err != nil {
+				return err
+			}
 		}
 		// If we dont want to error on args, avoid type checking them without a desired type.
 		s.typedExprs[i] = StripParens(s.exprs[i]).(*Placeholder)
@@ -1268,6 +1277,9 @@ func checkReturnPlaceholdersAtIdx(
 			}
 			return false, err
 		}
+		if typ.ResolvedType().IsAmbiguous() {
+			return false, nil
+		}
 		s.typedExprs[i] = typ
 	}
 	s.overloadIdxs = append(s.overloadIdxs[:0], idx)
diff --git a/pkg/sql/sem/tree/placeholders.go b/pkg/sql/sem/tree/placeholders.go
index 27f216ed707a..5ca26ebf5069 100644
--- a/pkg/sql/sem/tree/placeholders.go
+++ b/pkg/sql/sem/tree/placeholders.go
@@ -134,7 +134,12 @@ func (p *PlaceholderTypesInfo) SetType(idx PlaceholderIdx, typ *types.T) error {
 				pgcode.DatatypeMismatch,
 				"placeholder %s already has type %s, cannot assign %s", idx, t, typ)
 		}
-		return nil
+		// If `t` is not ambiguous or if `typ` is ambiguous, then we shouldn't
+		// change the type that's already set. Otherwise, we can use `typ` since
+		// it is more specific.
+		if !t.IsAmbiguous() || typ.IsAmbiguous() {
+			return nil
+		}
 	}
 	p.Types[idx] = typ
 	return nil
diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go
index 8ab4c43ec85f..c36cfb82b4a8 100644
--- a/pkg/sql/sem/tree/type_check.go
+++ b/pkg/sql/sem/tree/type_check.go
@@ -1696,10 +1696,19 @@ func (expr *Placeholder) TypeCheck(
 		return expr, err
 	} else if ok {
 		typ = typ.WithoutTypeModifiers()
-		if !desired.Equivalent(typ) {
-			// This indicates there's a conflict between what the type system thinks
-			// the type for this position should be, and the actual type of the
-			// placeholder. This actual placeholder type could be either a type hint
+		if !desired.Equivalent(typ) || (typ.IsAmbiguous() && !desired.IsAmbiguous()) {
+			// This indicates either:
+			// - There's a conflict between what the type system thinks
+			//   the type for this position should be, and the actual type of the
+			//   placeholder.
+			// - A type was already set for the placeholder, but it was ambiguous. If
+			//   the desired type is not ambiguous then it can be used as the
+			//   placeholder type. This can happen during overload type checking: an
+			//   overload that operates on collated strings might cause the type
+			//   checker to assign AnyCollatedString to a placeholder, but a later
+			//   stage of type checking can further refine the desired type.
+			//
+			// This actual placeholder type could be either a type hint
 			// (from pgwire or from a SQL PREPARE), or the actual value type.
 			//
 			// To resolve this situation, we *override* the placeholder type with what
@@ -2372,7 +2381,7 @@ func typeCheckSameTypedExprs(
 	// TODO(nvanbenschoten): Look into reducing allocations here.
 	typedExprs := make([]TypedExpr, len(exprs))
 
-	constIdxs, placeholderIdxs, resolvableIdxs := typeCheckSplitExprs(semaCtx, exprs)
+	constIdxs, placeholderIdxs, resolvableIdxs := typeCheckSplitExprs(exprs)
 
 	s := typeCheckExprsState{
 		ctx:             ctx,
@@ -2530,11 +2539,25 @@ func typeCheckSameTypedConsts(
 	return nil, errors.AssertionFailedf("should throw error above")
 }
 
-// Used to type check all constants with the optional desired type. The
-// type that is chosen here will then be set to any placeholders.
+// Used to type check all constants with the optional desired type. First,
+// placeholders with type hints are checked, then constants are checked to
+// match the resulting type. The type that is chosen here will then be set
+// to any unresolved placeholders.
 func typeCheckConstsAndPlaceholdersWithDesired(
 	s typeCheckExprsState, desired *types.T,
 ) ([]TypedExpr, *types.T, error) {
+	if !s.placeholderIdxs.Empty() {
+		for i, ok := s.placeholderIdxs.Next(0); ok; i, ok = s.placeholderIdxs.Next(i + 1) {
+			if !s.semaCtx.isUnresolvedPlaceholder(s.exprs[i]) {
+				typedExpr, err := typeCheckAndRequire(s.ctx, s.semaCtx, s.exprs[i], desired, "placeholder")
+				if err != nil {
+					return nil, nil, err
+				}
+				s.typedExprs[i] = typedExpr
+				desired = typedExpr.ResolvedType()
+			}
+		}
+	}
 	typ, err := typeCheckSameTypedConsts(s, desired, false)
 	if err != nil {
 		return nil, nil, err
@@ -2552,13 +2575,13 @@ func typeCheckConstsAndPlaceholdersWithDesired(
 // - Placeholders
 // - All other Exprs
 func typeCheckSplitExprs(
-	semaCtx *SemaContext, exprs []Expr,
+	exprs []Expr,
 ) (constIdxs util.FastIntSet, placeholderIdxs util.FastIntSet, resolvableIdxs util.FastIntSet) {
 	for i, expr := range exprs {
 		switch {
 		case isConstant(expr):
 			constIdxs.Add(i)
-		case semaCtx.isUnresolvedPlaceholder(expr):
+		case isPlaceholder(expr):
 			placeholderIdxs.Add(i)
 		default:
 			resolvableIdxs.Add(i)
diff --git a/pkg/sql/sem/tree/type_check_internal_test.go b/pkg/sql/sem/tree/type_check_internal_test.go
index 01bbf80b9d13..c24a9afacfa2 100644
--- a/pkg/sql/sem/tree/type_check_internal_test.go
+++ b/pkg/sql/sem/tree/type_check_internal_test.go
@@ -314,6 +314,7 @@ func TestTypeCheckSameTypedExprsError(t *testing.T) {
 	tupleIntMismatchErr := `expected .* to be of type (tuple|int), found type (tuple|int)`
 	tupleLenErr := `expected tuple .* to have a length of .*`
 	placeholderErr := `could not determine data type of placeholder .*`
+	placeholderAlreadyAssignedErr := `placeholder .* already has type (decimal|int), cannot assign (decimal|int)`
 
 	testData := []struct {
 		ptypes  tree.PlaceholderTypes
@@ -325,7 +326,7 @@ func TestTypeCheckSameTypedExprsError(t *testing.T) {
 		// Single type mismatches.
 		{nil, nil, exprs(dint(1), decConst("1.1")), decimalIntMismatchErr},
 		{nil, nil, exprs(dint(1), ddecimal(1)), decimalIntMismatchErr},
-		{ptypesInt, nil, exprs(decConst("1.1"), placeholder(0)), decimalIntMismatchErr},
+		{ptypesInt, nil, exprs(decConst("1.1"), placeholder(0)), placeholderAlreadyAssignedErr},
 		// Tuple type mismatches.
 		{nil, nil, exprs(tuple(dint(1)), tuple(ddecimal(1))), tupleFloatIntMismatchErr},
 		{nil, nil, exprs(tuple(dint(1)), dint(1), dint(1)), tupleIntMismatchErr},
diff --git a/pkg/sql/sem/tree/type_check_test.go b/pkg/sql/sem/tree/type_check_test.go
index 844ce46e4240..78b765328e07 100644
--- a/pkg/sql/sem/tree/type_check_test.go
+++ b/pkg/sql/sem/tree/type_check_test.go
@@ -23,6 +23,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/testutils"
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
 	"github.com/cockroachdb/cockroach/pkg/util/log"
+	"github.com/stretchr/testify/require"
 )
 
 // The following tests need both the type checking infrastructure and also
@@ -437,3 +438,30 @@ func TestTypeCheckVolatility(t *testing.T) {
 		}
 	}
 }
+
+func TestTypeCheckCollatedString(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	ctx := context.Background()
+
+	// Typecheck without any restrictions.
+	semaCtx := tree.MakeSemaContext()
+	semaCtx.Properties.Require("", 0 /* flags */)
+
+	// Hint a normal string type for $1.
+	placeholderTypes := []*types.T{types.String}
+	err := semaCtx.Placeholders.Init(len(placeholderTypes), placeholderTypes)
+	require.NoError(t, err)
+
+	// The collated string constant must be on the LHS for this test, so that
+	// the type-checker chooses the collated string overload first.
+	expr, err := parser.ParseExpr("'cat'::STRING COLLATE \"en-US-u-ks-level2\" = ($1)")
+	require.NoError(t, err)
+	typed, err := tree.TypeCheck(ctx, expr, &semaCtx, types.Any)
+	require.NoError(t, err)
+
+	rightTyp := typed.(*tree.ComparisonExpr).Right.(tree.TypedExpr).ResolvedType()
+	require.Equal(t, rightTyp.Family(), types.CollatedStringFamily)
+	require.Equal(t, rightTyp.Locale(), "en-US-u-ks-level2")
+}

From 69f16c6b93347e6a035dd01803d65d4ef284c904 Mon Sep 17 00:00:00 2001
From: Xin Hao Zhang <xzhang@cockroachlabs.com>
Date: Mon, 5 Dec 2022 13:32:01 -0500
Subject: [PATCH 5/9] insights: record more txn insights

Closes #93076

This commit adds the following fields at the txn level when recording
insights in the sql insights system:
- contention: total txn contention time
- start_time: txn start time
- end_time: txn end time
- auto_retry_reason: last reason for auto txn retry
- retry_count
- rows_written
- rows_read

In addition, the following fields have been moved from recording at
the stmt level to recording at the txn level for insights:
- user
- application_name

Release note: None
---
 pkg/sql/conn_executor_exec.go                 |  4 ++
 pkg/sql/crdb_internal.go                      |  4 +-
 pkg/sql/executor_statement_metrics.go         |  2 +-
 pkg/sql/sqlstats/insights/insights.proto      | 13 +++++-
 pkg/sql/sqlstats/sslocal/sql_stats_test.go    | 40 +++++++++----------
 .../sqlstats/ssmemstorage/ss_mem_writer.go    | 28 +++++++++----
 pkg/sql/sqlstats/ssprovider.go                |  6 ++-
 7 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go
index c04c6075bce2..cad08bde6528 100644
--- a/pkg/sql/conn_executor_exec.go
+++ b/pkg/sql/conn_executor_exec.go
@@ -2394,9 +2394,12 @@ func (ex *connExecutor) recordTransactionFinish(
 		SessionID:               ex.sessionID,
 		TransactionID:           ev.txnID,
 		TransactionTimeSec:      txnTime.Seconds(),
+		StartTime:               txnStart,
+		EndTime:                 txnEnd,
 		Committed:               ev.eventType == txnCommit,
 		ImplicitTxn:             implicit,
 		RetryCount:              int64(ex.state.mu.autoRetryCounter),
+		AutoRetryReason:         ex.state.mu.autoRetryReason,
 		StatementFingerprintIDs: ex.extraTxnState.transactionStatementFingerprintIDs,
 		ServiceLatency:          txnServiceLat,
 		RetryLatency:            txnRetryLat,
@@ -2409,6 +2412,7 @@ func (ex *connExecutor) recordTransactionFinish(
 		RowsWritten:             ex.extraTxnState.rowsWritten,
 		BytesRead:               ex.extraTxnState.bytesRead,
 		Priority:                ex.state.priority,
+		SessionData:             ex.sessionData(),
 	}
 
 	if ex.server.cfg.TestingKnobs.OnRecordTxnFinish != nil {
diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go
index f5976a079774..860d660a1dd6 100644
--- a/pkg/sql/crdb_internal.go
+++ b/pkg/sql/crdb_internal.go
@@ -6692,8 +6692,8 @@ func populateExecutionInsights(
 			startTimestamp,
 			endTimestamp,
 			tree.MakeDBool(tree.DBool(insight.Statement.FullScan)),
-			tree.NewDString(insight.Statement.User),
-			tree.NewDString(insight.Statement.ApplicationName),
+			tree.NewDString(insight.Transaction.User),
+			tree.NewDString(insight.Transaction.ApplicationName),
 			tree.NewDString(insight.Statement.Database),
 			tree.NewDString(insight.Statement.PlanGist),
 			tree.NewDInt(tree.DInt(insight.Statement.RowsRead)),
diff --git a/pkg/sql/executor_statement_metrics.go b/pkg/sql/executor_statement_metrics.go
index 4b36fe2c6ed2..601aa0c9f681 100644
--- a/pkg/sql/executor_statement_metrics.go
+++ b/pkg/sql/executor_statement_metrics.go
@@ -208,9 +208,9 @@ func (ex *connExecutor) recordStatementSummary(
 		StartTime:            phaseTimes.GetSessionPhaseTime(sessionphase.PlannerStartExecStmt),
 		EndTime:              phaseTimes.GetSessionPhaseTime(sessionphase.PlannerStartExecStmt).Add(svcLatRaw),
 		FullScan:             fullScan,
-		SessionData:          planner.SessionData(),
 		ExecStats:            queryLevelStats,
 		Indexes:              planner.instrumentation.indexesUsed,
+		Database:             planner.SessionData().Database,
 	}
 
 	stmtFingerprintID, err :=
diff --git a/pkg/sql/sqlstats/insights/insights.proto b/pkg/sql/sqlstats/insights/insights.proto
index c419dc3a631d..f8e0a9481e18 100644
--- a/pkg/sql/sqlstats/insights/insights.proto
+++ b/pkg/sql/sqlstats/insights/insights.proto
@@ -70,6 +70,15 @@ message Transaction {
     (gogoproto.nullable) = false];
   string user_priority = 3;
   bool implicit_txn = 4;
+  google.protobuf.Duration contention = 5 [(gogoproto.stdduration) = true];
+  google.protobuf.Timestamp start_time = 6 [(gogoproto.nullable) = false, (gogoproto.stdtime) = true];
+  google.protobuf.Timestamp end_time = 7 [(gogoproto.nullable) = false, (gogoproto.stdtime) = true];
+  string user = 8;
+  string application_name = 9;
+  int64 rows_read = 10;
+  int64 rows_written = 11;
+  int64 retry_count = 12;
+  string auto_retry_reason = 13;
 }
 
 message Statement {
@@ -89,8 +98,8 @@ message Statement {
   google.protobuf.Timestamp start_time = 6 [(gogoproto.nullable) = false, (gogoproto.stdtime) = true];
   google.protobuf.Timestamp end_time = 7 [(gogoproto.nullable) = false, (gogoproto.stdtime) = true];
   bool full_scan = 8;
-  string user = 9;
-  string application_name = 10;
+  reserved 9; // previously user
+  reserved 10; // previously application_name
   string database = 11;
   string plan_gist = 12;
   int64 rows_read = 13;
diff --git a/pkg/sql/sqlstats/sslocal/sql_stats_test.go b/pkg/sql/sqlstats/sslocal/sql_stats_test.go
index bece50da04cb..c22905df2a28 100644
--- a/pkg/sql/sqlstats/sslocal/sql_stats_test.go
+++ b/pkg/sql/sqlstats/sslocal/sql_stats_test.go
@@ -474,7 +474,15 @@ func TestExplicitTxnFingerprintAccounting(t *testing.T) {
 			statsCollector.EndTransaction(ctx, txnFingerprintID)
 			require.NoError(t,
 				statsCollector.
-					RecordTransaction(ctx, txnFingerprintID, sqlstats.RecordedTxnStats{}))
+					RecordTransaction(ctx, txnFingerprintID, sqlstats.RecordedTxnStats{
+						SessionData: &sessiondata.SessionData{
+							SessionData: sessiondatapb.SessionData{
+								UserProto:       username.RootUserName().EncodeProto(),
+								Database:        "defaultdb",
+								ApplicationName: "appname_findme",
+							},
+						},
+					}))
 		}()
 		for _, fingerprint := range testCase.fingerprints {
 			stmtFingerprintID, err := statsCollector.RecordStatement(
@@ -483,15 +491,7 @@ func TestExplicitTxnFingerprintAccounting(t *testing.T) {
 					Query:       fingerprint,
 					ImplicitTxn: testCase.implicit,
 				},
-				sqlstats.RecordedStmtStats{
-					SessionData: &sessiondata.SessionData{
-						SessionData: sessiondatapb.SessionData{
-							UserProto:       username.RootUserName().EncodeProto(),
-							Database:        "defaultdb",
-							ApplicationName: "appname_findme",
-						},
-					},
-				},
+				sqlstats.RecordedStmtStats{},
 			)
 			require.NoError(t, err)
 			txnFingerprintIDHash.Add(uint64(stmtFingerprintID))
@@ -592,15 +592,7 @@ func TestAssociatingStmtStatsWithTxnFingerprint(t *testing.T) {
 				stmtFingerprintID, err := statsCollector.RecordStatement(
 					ctx,
 					roachpb.StatementStatisticsKey{Query: fingerprint},
-					sqlstats.RecordedStmtStats{
-						SessionData: &sessiondata.SessionData{
-							SessionData: sessiondatapb.SessionData{
-								UserProto:       username.RootUserName().EncodeProto(),
-								Database:        "defaultdb",
-								ApplicationName: "appname_findme",
-							},
-						},
-					},
+					sqlstats.RecordedStmtStats{},
 				)
 				require.NoError(t, err)
 				txnFingerprintIDHash.Add(uint64(stmtFingerprintID))
@@ -608,7 +600,15 @@ func TestAssociatingStmtStatsWithTxnFingerprint(t *testing.T) {
 
 			transactionFingerprintID := roachpb.TransactionFingerprintID(txnFingerprintIDHash.Sum())
 			statsCollector.EndTransaction(ctx, transactionFingerprintID)
-			err := statsCollector.RecordTransaction(ctx, transactionFingerprintID, sqlstats.RecordedTxnStats{})
+			err := statsCollector.RecordTransaction(ctx, transactionFingerprintID, sqlstats.RecordedTxnStats{
+				SessionData: &sessiondata.SessionData{
+					SessionData: sessiondatapb.SessionData{
+						UserProto:       username.RootUserName().EncodeProto(),
+						Database:        "defaultdb",
+						ApplicationName: "appname_findme",
+					},
+				},
+			})
 			require.NoError(t, err)
 
 			// Gather the collected stats so that we can assert on them.
diff --git a/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go b/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go
index 7d27396c0a27..26034e9acdcb 100644
--- a/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go
+++ b/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go
@@ -194,9 +194,6 @@ func (s *Container) RecordStatement(
 		StartTime:            value.StartTime,
 		EndTime:              value.EndTime,
 		FullScan:             value.FullScan,
-		User:                 value.SessionData.User().Normalized(),
-		ApplicationName:      value.SessionData.ApplicationName,
-		Database:             value.SessionData.Database,
 		PlanGist:             value.PlanGist,
 		Retries:              int64(value.AutoRetryCount),
 		AutoRetryReason:      autoRetryReason,
@@ -206,6 +203,7 @@ func (s *Container) RecordStatement(
 		Contention:           contention,
 		ContentionEvents:     contentionEvents,
 		IndexRecommendations: value.IndexRecommendations,
+		Database:             value.Database,
 	})
 
 	return stats.ID, nil
@@ -318,12 +316,26 @@ func (s *Container) RecordTransaction(
 		stats.mu.data.ExecStats.MaxDiskUsage.Record(stats.mu.data.ExecStats.Count, float64(value.ExecStats.MaxDiskUsage))
 	}
 
-	s.insights.ObserveTransaction(value.SessionID, &insights.Transaction{
-		ID:            value.TransactionID,
-		FingerprintID: key,
-		UserPriority:  value.Priority.String(),
-		ImplicitTxn:   value.ImplicitTxn})
+	var retryReason string
+	if value.AutoRetryReason != nil {
+		retryReason = value.AutoRetryReason.Error()
+	}
 
+	s.insights.ObserveTransaction(value.SessionID, &insights.Transaction{
+		ID:              value.TransactionID,
+		FingerprintID:   key,
+		UserPriority:    value.Priority.String(),
+		ImplicitTxn:     value.ImplicitTxn,
+		Contention:      &value.ExecStats.ContentionTime,
+		StartTime:       value.StartTime,
+		EndTime:         value.EndTime,
+		User:            value.SessionData.User().Normalized(),
+		ApplicationName: value.SessionData.ApplicationName,
+		RowsRead:        value.RowsRead,
+		RowsWritten:     value.RowsWritten,
+		RetryCount:      value.RetryCount,
+		AutoRetryReason: retryReason,
+	})
 	return nil
 }
 
diff --git a/pkg/sql/sqlstats/ssprovider.go b/pkg/sql/sqlstats/ssprovider.go
index c00182debae4..bc123ded975c 100644
--- a/pkg/sql/sqlstats/ssprovider.go
+++ b/pkg/sql/sqlstats/ssprovider.go
@@ -217,9 +217,9 @@ type RecordedStmtStats struct {
 	StartTime            time.Time
 	EndTime              time.Time
 	FullScan             bool
-	SessionData          *sessiondata.SessionData
 	ExecStats            *execstats.QueryLevelStats
 	Indexes              []string
+	Database             string
 }
 
 // RecordedTxnStats stores the statistics of a transaction to be recorded.
@@ -227,9 +227,12 @@ type RecordedTxnStats struct {
 	SessionID               clusterunique.ID
 	TransactionID           uuid.UUID
 	TransactionTimeSec      float64
+	StartTime               time.Time
+	EndTime                 time.Time
 	Committed               bool
 	ImplicitTxn             bool
 	RetryCount              int64
+	AutoRetryReason         error
 	StatementFingerprintIDs []roachpb.StmtFingerprintID
 	ServiceLatency          time.Duration
 	RetryLatency            time.Duration
@@ -242,4 +245,5 @@ type RecordedTxnStats struct {
 	RowsWritten             int64
 	BytesRead               int64
 	Priority                roachpb.UserPriority
+	SessionData             *sessiondata.SessionData
 }

From e6fae104271304950b7b1176a3bd6dd736d17f15 Mon Sep 17 00:00:00 2001
From: Ricky Stewart <rickybstewart@gmail.com>
Date: Tue, 6 Dec 2022 14:51:09 -0600
Subject: [PATCH 6/9] bazci: insert `--config ci` if necessary

All builds and tests in CI need this --config argument.

Epic: None
Release note: None
---
 pkg/cmd/bazci/bazci.go | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pkg/cmd/bazci/bazci.go b/pkg/cmd/bazci/bazci.go
index 2dacd24ac4b0..cbda934e261e 100644
--- a/pkg/cmd/bazci/bazci.go
+++ b/pkg/cmd/bazci/bazci.go
@@ -333,6 +333,18 @@ func bazciImpl(cmd *cobra.Command, args []string) error {
 	}
 	args = append(args, fmt.Sprintf("--build_event_binary_file=%s", bepLoc))
 	args = append(args, fmt.Sprintf("--bes_backend=grpc://127.0.0.1:%d", port))
+	// Insert `--config ci` if it's not already in the args list.
+	hasCiConfig := false
+	for idx, arg := range args {
+		if arg == "--config=ci" || arg == "--config=cinolint" ||
+			(arg == "--config" && idx < len(args)-1 && (args[idx+1] == "ci" || args[idx+1] == "cinolint")) {
+			hasCiConfig = true
+			break
+		}
+	}
+	if !hasCiConfig {
+		args = append(args, "--config", "ci")
+	}
 	fmt.Println("running bazel w/ args: ", shellescape.QuoteCommand(args))
 	bazelCmd := exec.Command("bazel", args...)
 	bazelCmd.Stdout = os.Stdout

From 9d19eca9f464ca3a8b6908ead4dca03ae87df1b1 Mon Sep 17 00:00:00 2001
From: healthy-pod <ahmad@cockroachlabs.com>
Date: Tue, 6 Dec 2022 16:26:43 -0800
Subject: [PATCH 7/9] dev: fix dev build error when cross building

This is a temporary fix for the issue. In a future change,
we should let beaver hub distinguish between normal and cross
builds, and then have an actual fix for this.

Release note: None
Epic: none
---
 dev                  |  2 +-
 pkg/cmd/dev/build.go | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/dev b/dev
index f41a41e86f8a..8a3b02cbfed8 100755
--- a/dev
+++ b/dev
@@ -8,7 +8,7 @@ fi
 set -euo pipefail
 
 # Bump this counter to force rebuilding `dev` on all machines.
-DEV_VERSION=64
+DEV_VERSION=65
 
 THIS_DIR=$(cd "$(dirname "$0")" && pwd)
 BINARY_DIR=$THIS_DIR/bin/dev-versions
diff --git a/pkg/cmd/dev/build.go b/pkg/cmd/dev/build.go
index 51ab3668440e..076117b2ff1c 100644
--- a/pkg/cmd/dev/build.go
+++ b/pkg/cmd/dev/build.go
@@ -167,13 +167,13 @@ func (d *dev) build(cmd *cobra.Command, commandLine []string) error {
 		return err
 	}
 	args = append(args, additionalBazelArgs...)
-	if buildutil.CrdbTestBuild {
-		args = append(args, "--build_event_binary_file=/tmp/path")
-	} else {
-		args = append(args, fmt.Sprintf("--build_event_binary_file=%s", filepath.Join(tmpDir, bepFileBasename)))
-	}
 
 	if cross == "" {
+		if buildutil.CrdbTestBuild {
+			args = append(args, "--build_event_binary_file=/tmp/path")
+		} else {
+			args = append(args, fmt.Sprintf("--build_event_binary_file=%s", filepath.Join(tmpDir, bepFileBasename)))
+		}
 		logCommand("bazel", args...)
 		if err := d.exec.CommandContextInheritingStdStreams(ctx, "bazel", args...); err != nil {
 			return err

From b4d87263a50624e5083544b42924cbd4b7745a47 Mon Sep 17 00:00:00 2001
From: Andrii Vorobiov <and.vorobiov@gmail.com>
Date: Tue, 18 Oct 2022 00:36:00 +0300
Subject: [PATCH 8/9] kv: collect hot ranges per tenant on store

The main goal of this change is to provide ability to request
hot ranges info per tenant (as part of changes for UA).
To track down hot ranges per tenant, `ReplicaRankingMap` struct
is added that works almost identically as existing `ReplicaRankings`
struct with only difference that hottest replicas are stored in an
underlying map structure to keep separately ranges for every tenant.
Also, it was decided to not extend existing `ReplicaRankings`
functionality because it is used for replica rebalancing that doesn't
need to know about tenants.

This change might bring small overhead for self-hosted clusters as
it will accumulate same hot ranges in both `replRankings` and
`replRankingsByTenant` fields in Store.

Changes in next PRs are supposed to rely on this change.

Release note: None
---
 pkg/kv/kvserver/replica_rankings.go      | 79 ++++++++++++++++++++++++
 pkg/kv/kvserver/replica_rankings_test.go | 70 +++++++++++++++++++++
 pkg/kv/kvserver/store.go                 | 71 +++++++++++++--------
 3 files changed, 194 insertions(+), 26 deletions(-)

diff --git a/pkg/kv/kvserver/replica_rankings.go b/pkg/kv/kvserver/replica_rankings.go
index dcf6edca5606..39986af6da0b 100644
--- a/pkg/kv/kvserver/replica_rankings.go
+++ b/pkg/kv/kvserver/replica_rankings.go
@@ -199,3 +199,82 @@ func (pq *rrPriorityQueue) Pop() interface{} {
 	pq.entries = old[0 : n-1]
 	return item
 }
+
+// ReplicaRankingMap maintains top-k orderings of the replicas per tenant in a store by QPS.
+type ReplicaRankingMap struct {
+	mu struct {
+		syncutil.Mutex
+		items RRAccumulatorByTenant
+	}
+}
+
+// NewReplicaRankingsMap returns a new ReplicaRankingMap struct.
+func NewReplicaRankingsMap() *ReplicaRankingMap {
+	return &ReplicaRankingMap{}
+}
+
+// NewAccumulator returns a new rrAccumulator.
+func (rr *ReplicaRankingMap) NewAccumulator() *RRAccumulatorByTenant {
+	return &RRAccumulatorByTenant{}
+}
+
+// Update sets the accumulator for replica tracking to be the passed in value.
+func (rr *ReplicaRankingMap) Update(acc *RRAccumulatorByTenant) {
+	rr.mu.Lock()
+	rr.mu.items = *acc
+	rr.mu.Unlock()
+}
+
+// TopQPS returns the highest QPS CandidateReplicas that are tracked.
+func (rr *ReplicaRankingMap) TopQPS(tenantID roachpb.TenantID) []CandidateReplica {
+	rr.mu.Lock()
+	defer rr.mu.Unlock()
+	r, ok := rr.mu.items[tenantID]
+	if !ok {
+		return []CandidateReplica{}
+	}
+	if r.Len() > 0 {
+		r.entries = consumeAccumulator(&r)
+		rr.mu.items[tenantID] = r
+	}
+	return r.entries
+}
+
+// RRAccumulatorByTenant accumulates replicas per tenant to update the replicas tracked by ReplicaRankingMap.
+// It should be used in the same way as RRAccumulator (see doc string).
+type RRAccumulatorByTenant map[roachpb.TenantID]rrPriorityQueue
+
+// AddReplica adds a replica to the replica accumulator.
+func (a RRAccumulatorByTenant) AddReplica(repl CandidateReplica) {
+	// Do not consider ranges as hot when they are accessed once or less times.
+	if repl.QPS() <= 1 {
+		return
+	}
+
+	tID, ok := repl.Repl().TenantID()
+	if !ok {
+		return
+	}
+
+	r, ok := a[tID]
+	if !ok {
+		q := rrPriorityQueue{
+			val: func(r CandidateReplica) float64 { return r.QPS() },
+		}
+		heap.Push(&q, repl)
+		a[tID] = q
+		return
+	}
+
+	if r.Len() < numTopReplicasToTrack {
+		heap.Push(&r, repl)
+		a[tID] = r
+		return
+	}
+
+	if repl.QPS() > r.entries[0].QPS() {
+		heap.Pop(&r)
+		heap.Push(&r, repl)
+		a[tID] = r
+	}
+}
diff --git a/pkg/kv/kvserver/replica_rankings_test.go b/pkg/kv/kvserver/replica_rankings_test.go
index 24160988f035..47e68cb15c41 100644
--- a/pkg/kv/kvserver/replica_rankings_test.go
+++ b/pkg/kv/kvserver/replica_rankings_test.go
@@ -484,3 +484,73 @@ func TestReadLoadMetricAccounting(t *testing.T) {
 		})
 	}
 }
+
+func TestNewReplicaRankingsMap(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	rr := NewReplicaRankingsMap()
+
+	type testCase struct {
+		tenantID uint64
+		qps      float64
+	}
+
+	testCases := [][]testCase{
+		{},
+		{{1, 1}, {1, 2}, {1, 3}, {1, 4}},
+		{{1, 1}, {1, 2}, {2, 0}, {3, 0}},
+		{{1, 1}, {1, 2}, {1, 3}, {1, 4},
+			{2, 1}, {2, 2}, {2, 3}, {2, 4},
+			{3, 1}, {3, 2}, {3, 3}, {3, 4}},
+	}
+
+	for _, tc := range testCases {
+		acc := rr.NewAccumulator()
+
+		// Randomize the order of the inputs each time the test is run.
+		rand.Shuffle(len(tc), func(i, j int) {
+			tc[i], tc[j] = tc[j], tc[i]
+		})
+
+		expectedReplicasPerTenant := make(map[uint64]int)
+
+		for i, c := range tc {
+			cr := candidateReplica{
+				Replica: &Replica{RangeID: roachpb.RangeID(i)},
+				qps:     c.qps,
+			}
+			cr.mu.tenantID = roachpb.MustMakeTenantID(c.tenantID)
+			acc.AddReplica(cr)
+
+			if c.qps <= 1 {
+				continue
+			}
+
+			if l, ok := expectedReplicasPerTenant[c.tenantID]; ok {
+				expectedReplicasPerTenant[c.tenantID] = l + 1
+			} else {
+				expectedReplicasPerTenant[c.tenantID] = 1
+			}
+		}
+		rr.Update(acc)
+
+		for tID, count := range expectedReplicasPerTenant {
+			repls := rr.TopQPS(roachpb.MustMakeTenantID(tID))
+			if len(repls) != count {
+				t.Errorf("wrong number of replicas in output; got: %v; want: %v", repls, tc)
+				continue
+			}
+			for i := 0; i < len(repls)-1; i++ {
+				if repls[i].QPS() < repls[i+1].QPS() {
+					t.Errorf("got %f for %d'th element; it's smaller than QPS of the next element %f", repls[i].QPS(), i, repls[i+1].QPS())
+					break
+				}
+			}
+			replsCopy := rr.TopQPS(roachpb.MustMakeTenantID(tID))
+			if !reflect.DeepEqual(repls, replsCopy) {
+				t.Errorf("got different replicas on second call to topQPS; first call: %v, second call: %v", repls, replsCopy)
+			}
+		}
+	}
+}
diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go
index 1e950873401a..1ca007a84542 100644
--- a/pkg/kv/kvserver/store.go
+++ b/pkg/kv/kvserver/store.go
@@ -735,21 +735,22 @@ increasing over time (see Replica.setTombstoneKey).
 NOTE: to the best of our knowledge, we don't rely on this invariant.
 */
 type Store struct {
-	Ident           *roachpb.StoreIdent // pointer to catch access before Start() is called
-	cfg             StoreConfig
-	db              *kv.DB
-	engine          storage.Engine          // The underlying key-value store
-	tsCache         tscache.Cache           // Most recent timestamps for keys / key ranges
-	allocator       allocatorimpl.Allocator // Makes allocation decisions
-	replRankings    *ReplicaRankings
-	storeRebalancer *StoreRebalancer
-	rangeIDAlloc    *idalloc.Allocator // Range ID allocator
-	mvccGCQueue     *mvccGCQueue       // MVCC GC queue
-	mergeQueue      *mergeQueue        // Range merging queue
-	splitQueue      *splitQueue        // Range splitting queue
-	replicateQueue  *replicateQueue    // Replication queue
-	replicaGCQueue  *replicaGCQueue    // Replica GC queue
-	raftLogQueue    *raftLogQueue      // Raft log truncation queue
+	Ident                *roachpb.StoreIdent // pointer to catch access before Start() is called
+	cfg                  StoreConfig
+	db                   *kv.DB
+	engine               storage.Engine          // The underlying key-value store
+	tsCache              tscache.Cache           // Most recent timestamps for keys / key ranges
+	allocator            allocatorimpl.Allocator // Makes allocation decisions
+	replRankings         *ReplicaRankings
+	replRankingsByTenant *ReplicaRankingMap
+	storeRebalancer      *StoreRebalancer
+	rangeIDAlloc         *idalloc.Allocator // Range ID allocator
+	mvccGCQueue          *mvccGCQueue       // MVCC GC queue
+	mergeQueue           *mergeQueue        // Range merging queue
+	splitQueue           *splitQueue        // Range splitting queue
+	replicateQueue       *replicateQueue    // Replication queue
+	replicaGCQueue       *replicaGCQueue    // Replica GC queue
+	raftLogQueue         *raftLogQueue      // Raft log truncation queue
 	// Carries out truncations proposed by the raft log queue, and "replicated"
 	// via raft, when they are safe. Created in Store.Start.
 	raftTruncator      *raftLogTruncator
@@ -1239,6 +1240,8 @@ func NewStore(
 	}
 	s.replRankings = NewReplicaRankings()
 
+	s.replRankingsByTenant = NewReplicaRankingsMap()
+
 	s.raftRecvQueues.mon = mon.NewUnlimitedMonitor(
 		ctx,
 		"raft-receive-queue",
@@ -3057,6 +3060,7 @@ func (s *Store) Capacity(ctx context.Context, useCached bool) (roachpb.StoreCapa
 	bytesPerReplica := make([]float64, 0, replicaCount)
 	writesPerReplica := make([]float64, 0, replicaCount)
 	rankingsAccumulator := s.replRankings.NewAccumulator()
+	rankingsByTenantAccumulator := s.replRankingsByTenant.NewAccumulator()
 
 	// Query the current L0 sublevels and record the updated maximum to metrics.
 	l0SublevelsMax = int64(syncutil.LoadFloat64(&s.metrics.l0SublevelsWindowedMax))
@@ -3082,10 +3086,12 @@ func (s *Store) Capacity(ctx context.Context, useCached bool) (roachpb.StoreCapa
 			totalWritesPerSecond += wps
 			writesPerReplica = append(writesPerReplica, wps)
 		}
-		rankingsAccumulator.AddReplica(candidateReplica{
+		cr := candidateReplica{
 			Replica: r,
 			qps:     qps,
-		})
+		}
+		rankingsAccumulator.AddReplica(cr)
+		rankingsByTenantAccumulator.AddReplica(cr)
 		return true
 	})
 	capacity.RangeCount = rangeCount
@@ -3103,6 +3109,7 @@ func (s *Store) Capacity(ctx context.Context, useCached bool) (roachpb.StoreCapa
 	capacity.WritesPerReplica = roachpb.PercentilesFromData(writesPerReplica)
 	s.recordNewPerSecondStats(totalQueriesPerSecond, totalWritesPerSecond)
 	s.replRankings.Update(rankingsAccumulator)
+	s.replRankingsByTenant.Update(rankingsByTenantAccumulator)
 
 	s.cachedCapacity.Lock()
 	s.cachedCapacity.StoreCapacity = capacity
@@ -3556,15 +3563,27 @@ type HotReplicaInfo struct {
 // out of date.
 func (s *Store) HottestReplicas() []HotReplicaInfo {
 	topQPS := s.replRankings.TopQPS()
-	hotRepls := make([]HotReplicaInfo, len(topQPS))
-	for i := range topQPS {
-		hotRepls[i].Desc = topQPS[i].Desc()
-		hotRepls[i].QPS = topQPS[i].QPS()
-		hotRepls[i].RequestsPerSecond = topQPS[i].Repl().RequestsPerSecond()
-		hotRepls[i].WriteKeysPerSecond = topQPS[i].Repl().WritesPerSecond()
-		hotRepls[i].ReadKeysPerSecond = topQPS[i].Repl().ReadsPerSecond()
-		hotRepls[i].WriteBytesPerSecond = topQPS[i].Repl().WriteBytesPerSecond()
-		hotRepls[i].ReadBytesPerSecond = topQPS[i].Repl().ReadBytesPerSecond()
+	return mapToHotReplicasInfo(topQPS)
+}
+
+// HottestReplicasByTenant returns the hottest replicas on a store for specified
+// tenant ID. It works identically as HottestReplicas func with only exception that
+// hottest replicas are grouped by tenant ID.
+func (s *Store) HottestReplicasByTenant(tenantID roachpb.TenantID) []HotReplicaInfo {
+	topQPS := s.replRankingsByTenant.TopQPS(tenantID)
+	return mapToHotReplicasInfo(topQPS)
+}
+
+func mapToHotReplicasInfo(repls []CandidateReplica) []HotReplicaInfo {
+	hotRepls := make([]HotReplicaInfo, len(repls))
+	for i := range repls {
+		hotRepls[i].Desc = repls[i].Desc()
+		hotRepls[i].QPS = repls[i].QPS()
+		hotRepls[i].RequestsPerSecond = repls[i].Repl().RequestsPerSecond()
+		hotRepls[i].WriteKeysPerSecond = repls[i].Repl().WritesPerSecond()
+		hotRepls[i].ReadKeysPerSecond = repls[i].Repl().ReadsPerSecond()
+		hotRepls[i].WriteBytesPerSecond = repls[i].Repl().WriteBytesPerSecond()
+		hotRepls[i].ReadBytesPerSecond = repls[i].Repl().ReadBytesPerSecond()
 	}
 	return hotRepls
 }

From cd000eeab94e78755cf4de532a4e1c4484a63a24 Mon Sep 17 00:00:00 2001
From: Bilal Akhtar <bilal@cockroachlabs.com>
Date: Wed, 7 Dec 2022 10:35:38 -0500
Subject: [PATCH 9/9] vendor: bump Pebble to 0d6d19018632

0d6d1901 crossversion: don't stream TestMeta output on verbose
d05a6f0e vfs: use SequentialReadsOption in vfs.[Limited]Copy
b85fc64f merging_iter: don't relative-seek past prefix in isNextEntryDeleted
32ad55f8 *: use github.com/cockroachdb/datadriven
6b644274 sstable: don't fatal if file no longer exists during readahead

Release note: None.
---
 DEPS.bzl                          | 6 +++---
 build/bazelutil/distdir_files.bzl | 2 +-
 go.mod                            | 2 +-
 go.sum                            | 4 ++--
 vendor                            | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/DEPS.bzl b/DEPS.bzl
index 73e5379d6229..e05062117da8 100644
--- a/DEPS.bzl
+++ b/DEPS.bzl
@@ -1485,10 +1485,10 @@ def go_deps():
         patches = [
             "@com_github_cockroachdb_cockroach//build/patches:com_github_cockroachdb_pebble.patch",
         ],
-        sha256 = "4452117f35d8c00d73e8384dc2ba3c9bb69bf4507b30d0fec2a4287c7f318efa",
-        strip_prefix = "github.com/cockroachdb/pebble@v0.0.0-20221205175550-4a63cdb3a71e",
+        sha256 = "b422de55eea4f2662a4e1b32807d699f4f7feb0fab40dc0e99455473561c689c",
+        strip_prefix = "github.com/cockroachdb/pebble@v0.0.0-20221206222826-0d6d19018632",
         urls = [
-            "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20221205175550-4a63cdb3a71e.zip",
+            "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20221206222826-0d6d19018632.zip",
         ],
     )
     go_repository(
diff --git a/build/bazelutil/distdir_files.bzl b/build/bazelutil/distdir_files.bzl
index d178849aec68..517ce8409c2a 100644
--- a/build/bazelutil/distdir_files.bzl
+++ b/build/bazelutil/distdir_files.bzl
@@ -193,7 +193,7 @@ DISTDIR_FILES = {
     "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/google-api-go-client/com_github_cockroachdb_google_api_go_client-v0.80.1-0.20221117193156-6a9f7150cb93.zip": "b3378c579f4f4340403038305907d672c86f615f8233118a8873ebe4229c4f39",
     "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/gostdlib/com_github_cockroachdb_gostdlib-v1.19.0.zip": "c4d516bcfe8c07b6fc09b8a9a07a95065b36c2855627cb3514e40c98f872b69e",
     "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/logtags/com_github_cockroachdb_logtags-v0.0.0-20211118104740-dabe8e521a4f.zip": "1972c3f171f118add3fd9e64bcea6cbb9959a3b7fa0ada308e8a7310813fea74",
-    "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20221205175550-4a63cdb3a71e.zip": "4452117f35d8c00d73e8384dc2ba3c9bb69bf4507b30d0fec2a4287c7f318efa",
+    "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20221206222826-0d6d19018632.zip": "b422de55eea4f2662a4e1b32807d699f4f7feb0fab40dc0e99455473561c689c",
     "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/redact/com_github_cockroachdb_redact-v1.1.3.zip": "7778b1e4485e4f17f35e5e592d87eb99c29e173ac9507801d000ad76dd0c261e",
     "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/returncheck/com_github_cockroachdb_returncheck-v0.0.0-20200612231554-92cdbca611dd.zip": "ce92ba4352deec995b1f2eecf16eba7f5d51f5aa245a1c362dfe24c83d31f82b",
     "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/sentry-go/com_github_cockroachdb_sentry_go-v0.6.1-cockroachdb.2.zip": "fbb2207d02aecfdd411b1357efe1192dbb827959e36b7cab7491731ac55935c9",
diff --git a/go.mod b/go.mod
index cf79bceb6f3a..7e9f296375a1 100644
--- a/go.mod
+++ b/go.mod
@@ -112,7 +112,7 @@ require (
 	github.com/cockroachdb/go-test-teamcity v0.0.0-20191211140407-cff980ad0a55
 	github.com/cockroachdb/gostdlib v1.19.0
 	github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f
-	github.com/cockroachdb/pebble v0.0.0-20221205175550-4a63cdb3a71e
+	github.com/cockroachdb/pebble v0.0.0-20221206222826-0d6d19018632
 	github.com/cockroachdb/redact v1.1.3
 	github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd
 	github.com/cockroachdb/stress v0.0.0-20220803192808-1806698b1b7b
diff --git a/go.sum b/go.sum
index 8dcd2bc55640..3cf4069885f4 100644
--- a/go.sum
+++ b/go.sum
@@ -475,8 +475,8 @@ github.com/cockroachdb/gostdlib v1.19.0/go.mod h1:+dqqpARXbE/gRDEhCak6dm0l14AaTy
 github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI=
 github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f h1:6jduT9Hfc0njg5jJ1DdKCFPdMBrp/mdZfCpa5h+WM74=
 github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs=
-github.com/cockroachdb/pebble v0.0.0-20221205175550-4a63cdb3a71e h1:ASgNX7mpOQnyi6P3ZMXgfB0V9jUAzuLVDxL5AV7oSP0=
-github.com/cockroachdb/pebble v0.0.0-20221205175550-4a63cdb3a71e/go.mod h1:qf9bLis2yy1XyNYD01wvIHPabuC1STzQsvGibYVsom4=
+github.com/cockroachdb/pebble v0.0.0-20221206222826-0d6d19018632 h1:XD+3KAuihY5B+bvwmvfq/RjFTj8AA/wj5Oq+IB/WSxg=
+github.com/cockroachdb/pebble v0.0.0-20221206222826-0d6d19018632/go.mod h1:8vvNzfaCFGp5Yvnqu0+a1LCL5i+NCID7YsNdhe0xhM8=
 github.com/cockroachdb/redact v1.1.3 h1:AKZds10rFSIj7qADf0g46UixK8NNLwWTNdCIGS5wfSQ=
 github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
 github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd h1:KFOt5I9nEKZgCnOSmy8r4Oykh8BYQO8bFOTgHDS8YZA=
diff --git a/vendor b/vendor
index b8fee08b85b1..9b2fc6fe7134 160000
--- a/vendor
+++ b/vendor
@@ -1 +1 @@
-Subproject commit b8fee08b85b1a42f4df7e51980fb12f5ca836c1c
+Subproject commit 9b2fc6fe71347c79943551c2a7069d878196cc19