From 4b87640f5f7f92e21d2fe4e82cdd3e9d5afef837 Mon Sep 17 00:00:00 2001
From: gtr <gerardo@cockroachlabs.com>
Date: Tue, 22 Aug 2023 13:52:57 -0400
Subject: [PATCH 1/9] sql stats: skip tests hitting combinedstmts and
 statements endpoints under stress

This commit skips tests which hit the `combinedStmts` or `statements`
endpoints which will sometimes timeout under stress as a result of
recent backend changes. The test investigation is tracked by #109184.

Release note: None
---
 pkg/server/application_api/sql_stats_test.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pkg/server/application_api/sql_stats_test.go b/pkg/server/application_api/sql_stats_test.go
index 2d396ecadf4c..cbf3a895cd6e 100644
--- a/pkg/server/application_api/sql_stats_test.go
+++ b/pkg/server/application_api/sql_stats_test.go
@@ -48,6 +48,9 @@ func TestStatusAPICombinedTransactions(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 	defer log.Scope(t).Close(t)
 
+	// Skip under stress until we extend the timeout for the http client.
+	skip.UnderStressWithIssue(t, 109184)
+
 	var params base.TestServerArgs
 	params.Knobs.SpanConfig = &spanconfig.TestingKnobs{ManagerDisableJobCreation: true} // TODO(irfansharif): #74919.
 	testCluster := serverutils.StartCluster(t, 3, base.TestClusterArgs{
@@ -380,6 +383,9 @@ func TestStatusAPIStatements(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 	defer log.Scope(t).Close(t)
 
+	// Skip under stress until we extend the timeout for the http client.
+	skip.UnderStressWithIssue(t, 109184)
+
 	// Aug 30 2021 19:50:00 GMT+0000
 	aggregatedTs := int64(1630353000)
 	statsKnobs := sqlstats.CreateTestingKnobs()
@@ -646,6 +652,9 @@ func TestStatusAPICombinedStatements(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 	defer log.Scope(t).Close(t)
 
+	// Skip under stress until we extend the timeout for the http client.
+	skip.UnderStressWithIssue(t, 109184)
+
 	// Aug 30 2021 19:50:00 GMT+0000
 	aggregatedTs := int64(1630353000)
 	statsKnobs := sqlstats.CreateTestingKnobs()

From 1253c6811f269a8b54076a4d7b9bf6c3d2946082 Mon Sep 17 00:00:00 2001
From: rharding6373 <rharding6373@users.noreply.github.com>
Date: Tue, 22 Aug 2023 14:40:59 -0700
Subject: [PATCH 2/9] sql: fix expected batch count for edge case in copy test

In TestLargeDynamicRows we test that 4 rows of data can fit in a batch
size of at least 4 rows given default memory sizes. However, when we set
the batch row size to the minimum value of 4, the test hook that counts
batches counts an extra empty batch. This PR changes adjusts the minimum
row size to 5 for the purposes of this test.

Epic: None
Fixes: #109134

Release note: None
---
 pkg/sql/copy/copy_test.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pkg/sql/copy/copy_test.go b/pkg/sql/copy/copy_test.go
index fcffd3dc0464..9c1564b84806 100644
--- a/pkg/sql/copy/copy_test.go
+++ b/pkg/sql/copy/copy_test.go
@@ -601,9 +601,10 @@ func TestLargeDynamicRows(t *testing.T) {
 	err = conn.Exec(ctx, "RESET CLUSTER SETTING kv.raft.command.max_size")
 	require.NoError(t, err)
 
-	// This won't work if the batch size gets set to less than 4.
-	if sql.CopyBatchRowSize < 4 {
-		sql.SetCopyFromBatchSize(4)
+	// This won't work if the batch size gets set to less than 5. When the batch
+	// size is 4, the test hook will count an extra empty batch.
+	if sql.CopyBatchRowSize < 5 {
+		sql.SetCopyFromBatchSize(5)
 	}
 
 	_, err = conn.GetDriverConn().CopyFrom(ctx, strings.NewReader(sb.String()), "COPY t FROM STDIN")

From 306fced7773978ddfc8aa75b827fcf5e834774c6 Mon Sep 17 00:00:00 2001
From: Rail Aliiev <rail@iqchoice.com>
Date: Wed, 23 Aug 2023 06:58:42 -0400
Subject: [PATCH 3/9] build: update bazel builder build docs

Previously, the documentation described a manual build of the
`bazelbuilder` docker image. The current approach is to use CI to build
the image.

This PR updates the documentation to reflect the current process,
including the FIPS image steps.

Epic: none
Release note: None
---
 build/README.md | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/build/README.md b/build/README.md
index e713aaf77782..372363a55950 100644
--- a/build/README.md
+++ b/build/README.md
@@ -137,20 +137,11 @@ Please follow the instructions above on updating the golang version, omitting th
 
 The `bazelbuilder` image is used exclusively for performing builds using Bazel. Only add dependencies to the image that are necessary for performing Bazel builds. (Since the Bazel build downloads most dependencies as needed, updates to the Bazel builder image should be very infrequent.) The `bazelbuilder` image is published both for `amd64` and `arm64` platforms. You can go through the process of publishing a new Bazel build
 
-- (One-time setup) Depending on how your Docker instance is configured, you may have to run `docker run --privileged --rm tonistiigi/binfmt --install all`. This will install `qemu` emulators on your system for platforms besides your native one.
 - Edit `build/bazelbuilder/Dockerfile` as desired.
-- Build the image for both platforms and publish the cross-platform manifest. Note that the non-native build for your image will be very slow since it will have to emulate.
-```
-    TAG=$(date +%Y%m%d-%H%M%S)
-    docker build --platform linux/amd64 -t cockroachdb/bazel:amd64-$TAG build/bazelbuilder
-    docker push cockroachdb/bazel:amd64-$TAG
-    docker build --platform linux/arm64 -t cockroachdb/bazel:arm64-$TAG build/bazelbuilder
-    docker push cockroachdb/bazel:arm64-$TAG
-    docker manifest rm cockroachdb/bazel:$TAG
-    docker manifest create cockroachdb/bazel:$TAG cockroachdb/bazel:amd64-$TAG cockroachdb/bazel:arm64-$TAG
-    docker manifest push cockroachdb/bazel:$TAG
-```
-- Then, update `build/.bazelbuilderversion` with the new tag and commit all your changes.
+- Build the image by triggering the `Build and Push Bazel Builder Image` build in TeamCity. The generated image will be published to https://hub.docker.com/r/cockroachdb/bazel.
+- Update `build/.bazelbuilderversion` with the new tag and commit all your changes.
+- Build the FIPS image by triggering the `Build and Push FIPS Bazel Builder Image` build in TeamCity. The generated image will be published to https://hub.docker.com/r/cockroachdb/bazel-fips.
+- Update `build/.bazelbuilderversion-fips` with the new tag and commit all your changes.
 - Ensure the "Bazel CI" job passes on your PR before merging.
 
 #  Dependencies

From e6e8b640091868b48df9c9cb185de37f2dca73ed Mon Sep 17 00:00:00 2001
From: Liam Gillies <liam@cockroachlabs.com>
Date: Tue, 22 Aug 2023 16:53:17 -0400
Subject: [PATCH 4/9] dev: error when trying to `dev test` a bazel tested
 target

Running `dev test` on these integration tests will
always fail, so this PR adds an error when running
the command on those files.

Fixes: #107813
Release note: None
---
 pkg/cmd/dev/test.go | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/pkg/cmd/dev/test.go b/pkg/cmd/dev/test.go
index f11e62629d53..65bb1f3b1702 100644
--- a/pkg/cmd/dev/test.go
+++ b/pkg/cmd/dev/test.go
@@ -42,6 +42,15 @@ const (
 	showDiffFlag     = "show-diff"
 )
 
+// List of bazel integration tests that will fail when running `dev test pkg/...`
+var integrationTests = map[string]struct{ testName, commandToRun string }{
+	"pkg/acceptance":              {"acceptance_test", "dev acceptance"},
+	"pkg/compose":                 {"compose_test", "dev compose"},
+	"pkg/compose/compare/compare": {"compare_test", "dev compose"},
+	"pkg/testutils/docker":        {"docker_test", ""},
+	"pkg/testutils/lint":          {"lint_test", "dev lint"},
+}
+
 func makeTestCmd(runE func(cmd *cobra.Command, args []string) error) *cobra.Command {
 	// testCmd runs the specified cockroachdb tests.
 	testCmd := &cobra.Command{
@@ -257,6 +266,20 @@ func (d *dev) test(cmd *cobra.Command, commandLine []string) error {
 		testTargets = append(testTargets, target)
 	}
 
+	for _, target := range testTargets {
+		testTarget := strings.Split(target, ":")
+		integrationTest, ok := integrationTests[testTarget[0]]
+		if ok {
+			// If the test targets all tests in the package or the individual test, warn the user
+			if testTarget[1] == "all" || testTarget[1] == integrationTest.testName {
+				if integrationTest.commandToRun == "" {
+					return fmt.Errorf("%s:%s will fail since it is an integration test", testTarget[0], integrationTest.testName)
+				}
+				return fmt.Errorf("%s:%s will fail since it is an integration test. To run this test, run `%s`", testTarget[0], integrationTest.testName, integrationTest.commandToRun)
+			}
+		}
+	}
+
 	args = append(args, testTargets...)
 	if ignoreCache {
 		args = append(args, "--nocache_test_results")

From d6e7b273727972fae773df151aa11d8197e99efe Mon Sep 17 00:00:00 2001
From: Jayant Shrivastava <jayants@cockroachlabs.com>
Date: Wed, 23 Aug 2023 10:37:43 -0400
Subject: [PATCH 5/9] changefeedccl: move node drain handling logic out of
 kvfeed

Previously, the kvfeed was responsible for monitoring for
node drains using a goroutine. This change moves this logic
into the change aggregator and removes the goroutine.
Overall, this change makes the code more organized and performant.

This change was inspired by work being done for #109167. The
work in that PR requires being able to restart the kvfeed.
Having drain logic intermingled with the kvfeed makes
restarts much more complex, hard to review, prone to bugs, etc.

Informs: https://github.com/cockroachdb/cockroach/issues/96953
Release note: None
Epic: None
---
 .../changefeedccl/changefeed_processors.go    | 81 +++++++++++--------
 pkg/ccl/changefeedccl/kvfeed/kv_feed.go       |  7 --
 2 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/pkg/ccl/changefeedccl/changefeed_processors.go b/pkg/ccl/changefeedccl/changefeed_processors.go
index 102a7f855a94..6852cf9f8e52 100644
--- a/pkg/ccl/changefeedccl/changefeed_processors.go
+++ b/pkg/ccl/changefeedccl/changefeed_processors.go
@@ -51,7 +51,8 @@ type changeAggregator struct {
 	spec    execinfrapb.ChangeAggregatorSpec
 	memAcc  mon.BoundAccount
 
-	// cancel shuts down the processor, both the `Next()` flow and the kvfeed.
+	// cancel cancels the context passed to all resources created while starting
+	// this aggregator.
 	cancel func()
 	// errCh contains the return values of the kvfeed.
 	errCh chan error
@@ -59,9 +60,12 @@ type changeAggregator struct {
 	kvFeedDoneCh chan struct{}
 	kvFeedMemMon *mon.BytesMonitor
 
-	// drainWatchCh is signaled if the registry on this node is being drained.
+	// drainWatchCh is signaled if the job registry on this node is being
+	// drained, which is a proxy for the node being drained. If a drain occurs,
+	// it will be blocked until we allow it to proceed by calling drainDone().
+	// This gives the aggregator time to checkpoint before shutting down.
 	drainWatchCh <-chan struct{}
-	drainDone    func() // Cleanup function for drain watch.
+	drainDone    func()
 
 	// sink is the Sink to write rows to. Resolved timestamps are never written
 	// by changeAggregator.
@@ -287,7 +291,6 @@ func (ca *changeAggregator) Start(ctx context.Context) {
 		ca.spec.User(), ca.spec.JobID, recorder)
 	if err != nil {
 		err = changefeedbase.MarkRetryableError(err)
-		// Early abort in the case that there is an error creating the sink.
 		ca.MoveToDraining(err)
 		ca.cancel()
 		return
@@ -312,7 +315,6 @@ func (ca *changeAggregator) Start(ctx context.Context) {
 
 	ca.eventProducer, err = ca.startKVFeed(ctx, spans, kvFeedHighWater, needsInitialScan, feed)
 	if err != nil {
-		// Early abort in the case that there is an error creating the sink.
 		ca.MoveToDraining(err)
 		ca.cancel()
 		return
@@ -321,9 +323,7 @@ func (ca *changeAggregator) Start(ctx context.Context) {
 	ca.eventConsumer, ca.sink, err = newEventConsumer(
 		ctx, ca.flowCtx.Cfg, ca.spec, feed, ca.frontier.SpanFrontier(), kvFeedHighWater,
 		ca.sink, ca.metrics, ca.sliMetrics, ca.knobs)
-
 	if err != nil {
-		// Early abort in the case that there is an error setting up the consumption.
 		ca.MoveToDraining(err)
 		ca.cancel()
 		return
@@ -334,6 +334,26 @@ func (ca *changeAggregator) Start(ctx context.Context) {
 
 	// Generate expensive checkpoint only after we ran for a while.
 	ca.lastSpanFlush = timeutil.Now()
+
+	if ca.knobs.OnDrain != nil {
+		ca.drainWatchCh = ca.knobs.OnDrain()
+	} else {
+		ca.drainWatchCh, ca.drainDone = ca.flowCtx.Cfg.JobRegistry.OnDrain()
+	}
+}
+
+// checkForNodeDrain returns an error if the node is draining.
+func (ca *changeAggregator) checkForNodeDrain() error {
+	if ca.drainWatchCh == nil {
+		return errors.AssertionFailedf("cannot check for node drain if" +
+			" watch channel is nil")
+	}
+	select {
+	case <-ca.drainWatchCh:
+		return changefeedbase.ErrNodeDraining
+	default:
+		return nil
+	}
 }
 
 func (ca *changeAggregator) startKVFeed(
@@ -355,21 +375,6 @@ func (ca *changeAggregator) startKVFeed(
 		return nil, err
 	}
 
-	ca.drainWatchCh, ca.drainDone = ca.flowCtx.Cfg.JobRegistry.OnDrain()
-	// Arrange for kvFeed to terminate if the job registry is being drained.
-	kvfeedCfg.FeedWatcher = func(ctx context.Context) error {
-		if ca.knobs.OnDrain != nil {
-			ca.drainWatchCh = ca.knobs.OnDrain()
-		}
-
-		select {
-		case <-ctx.Done():
-			return ctx.Err()
-		case <-ca.drainWatchCh:
-			return changefeedbase.ErrNodeDraining
-		}
-	}
-
 	// Give errCh enough buffer both possible errors from supporting goroutines,
 	// but only the first one is ever used.
 	ca.errCh = make(chan error, 2)
@@ -575,21 +580,27 @@ func (ca *changeAggregator) Next() (rowenc.EncDatumRow, *execinfrapb.ProducerMet
 			}
 		}
 
+		// As the last gasp before shutdown, transmit an up-to-date frontier
+		// information to the coordinator. We expect to get this signal via the
+		// polling below before the drain actually occurs and starts tearing
+		// things down.
+		if err := ca.checkForNodeDrain(); err != nil {
+			nodeID, _ := ca.FlowCtx.Cfg.NodeID.OptionalNodeID()
+			meta := &execinfrapb.ChangefeedMeta{
+				DrainInfo:  &execinfrapb.ChangefeedMeta_DrainInfo{NodeID: nodeID},
+				Checkpoint: getFrontierSpans(),
+			}
+
+			ca.AppendTrailingMeta(execinfrapb.ProducerMetadata{Changefeed: meta})
+			ca.shutdownCheckpointEmitted = true
+			ca.cancel()
+			ca.MoveToDraining(err)
+			break
+		}
+
 		if err := ca.tick(); err != nil {
 			var e kvevent.ErrBufferClosed
-			if errors.Is(err, changefeedbase.ErrNodeDraining) {
-				err = changefeedbase.ErrNodeDraining
-				// As the last gasp before shutdown, transmit an up-to-date frontier
-				// information to the coordinator.
-				nodeID, _ := ca.FlowCtx.Cfg.NodeID.OptionalNodeID()
-				meta := &execinfrapb.ChangefeedMeta{
-					DrainInfo:  &execinfrapb.ChangefeedMeta_DrainInfo{NodeID: nodeID},
-					Checkpoint: getFrontierSpans(),
-				}
-
-				ca.AppendTrailingMeta(execinfrapb.ProducerMetadata{Changefeed: meta})
-				ca.shutdownCheckpointEmitted = true
-			} else if errors.As(err, &e) {
+			if errors.As(err, &e) {
 				// ErrBufferClosed is a signal that our kvfeed has exited expectedly.
 				err = e.Unwrap()
 				if errors.Is(err, kvevent.ErrNormalRestartReason) {
diff --git a/pkg/ccl/changefeedccl/kvfeed/kv_feed.go b/pkg/ccl/changefeedccl/kvfeed/kv_feed.go
index 18f99e47567b..5147e9d8c92b 100644
--- a/pkg/ccl/changefeedccl/kvfeed/kv_feed.go
+++ b/pkg/ccl/changefeedccl/kvfeed/kv_feed.go
@@ -53,10 +53,6 @@ type Config struct {
 	SchemaChangePolicy      changefeedbase.SchemaChangePolicy
 	SchemaFeed              schemafeed.SchemaFeed
 
-	// FeedWatcher function is invoked along with the kv/schema feed.
-	// It may return an error which will cause kv feed to exit.
-	FeedWatcher func(ctx context.Context) error
-
 	// If true, the feed will begin with a dump of data at exactly the
 	// InitialHighWater. This is a peculiar behavior. In general the
 	// InitialHighWater is a point in time at which all data is known to have
@@ -115,9 +111,6 @@ func Run(ctx context.Context, cfg Config) error {
 	g := ctxgroup.WithContext(ctx)
 	g.GoCtx(cfg.SchemaFeed.Run)
 	g.GoCtx(f.run)
-	if cfg.FeedWatcher != nil {
-		g.GoCtx(cfg.FeedWatcher)
-	}
 	err := g.Wait()
 
 	// NB: The higher layers of the changefeed should detect the boundary and the

From 94539396ab6d8ccb4ddba3d0625663b5efbb98cf Mon Sep 17 00:00:00 2001
From: Chris Seto <chriskseto@gmail.com>
Date: Wed, 23 Aug 2023 17:49:05 +0000
Subject: [PATCH 6/9] build: explicitly set SKIP_LABEL_TEST_FAILURE in
 compose.sh

Previously, `SKIP_LABEL_TEST_FAILURE` was being set via a teamcity
configuration. This change was quite opaque as the majority of CI
configuration for Cockroach is stored as shell scripts within its repo.
This commit follows that pattern by explicitly setting
`SKIP_LABEL_TEST_FAILURE` in the script that runs `TestComposeCompare`.

Epic: None
Release note: None
---
 build/teamcity/cockroach/nightlies/compose.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/build/teamcity/cockroach/nightlies/compose.sh b/build/teamcity/cockroach/nightlies/compose.sh
index e6a775f79362..8f3bfbdbefdc 100755
--- a/build/teamcity/cockroach/nightlies/compose.sh
+++ b/build/teamcity/cockroach/nightlies/compose.sh
@@ -6,6 +6,12 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
 source "$dir/teamcity-support.sh"
 source "$dir/teamcity-bazel-support.sh"
 
+# The test failures generated by TestComposeCompare are not necessarily
+# failures per se. They're cases of behavioral divergences from Postgres. While
+# our compatibility guarantees are not 100%, it's better to treat failures as
+# information to occasionally review.
+export SKIP_LABEL_TEST_FAILURE=1
+
 tc_start_block "Run compose tests"
 
 bazel build //pkg/cmd/bazci --config=ci

From c506290d9e125c8a64f14367a8e6186337b0b02c Mon Sep 17 00:00:00 2001
From: Nathan VanBenschoten <nvanbenschoten@gmail.com>
Date: Wed, 23 Aug 2023 12:25:59 -0400
Subject: [PATCH 7/9] kv: wait on latches on each key in reverse acquisition
 order

This commit allocates latch IDs from the top of the uint64 space and in
reverse order. This is done to order latches in the tree on a same key
in reverse order of acquisition. Doing so ensures that when we iterate
over the tree and see a key with many conflicting latches, we visit the
latches on that key in the reverse order that they will be released. In
doing so, we minimize the number of open channels that we wait on (calls
to `waitForSignal`) and minimize the number of goroutine scheduling
points. This is important to avoid spikes in runnable goroutine after
each request completes, which can negatively affect node health.

Epic: None
Release note (performance improvement): The impact of high concurrency
blind writes to the same key on goroutine scheduling latency was reduced.
---
 pkg/kv/kvserver/spanlatch/manager.go | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pkg/kv/kvserver/spanlatch/manager.go b/pkg/kv/kvserver/spanlatch/manager.go
index 2b8b44b90f0b..32fe84e42081 100644
--- a/pkg/kv/kvserver/spanlatch/manager.go
+++ b/pkg/kv/kvserver/spanlatch/manager.go
@@ -436,7 +436,16 @@ func (m *Manager) insertLocked(lg *Guard) {
 }
 
 func (m *Manager) nextIDLocked() uint64 {
-	m.idAlloc++
+	// We allocate IDs from the top of the uint64 space and in reverse order.
+	// This is done to order latches in the tree on a same key in reverse order
+	// of acquisition. Doing so ensures that when we iterate over the tree and
+	// see a key with many conflicting latches, we visit the latches on that key
+	// in the reverse order that they will be released. In doing so, we minimize
+	// the number of open channels that we wait on (calls to waitForSignal) and
+	// minimize the number of goroutine scheduling points. This is important to
+	// avoid spikes in runnable goroutine after each request completes, which
+	// can negatively affect node health.
+	m.idAlloc--
 	return m.idAlloc
 }
 

From 387105df90f02b9635cbf2cfc7abb7471b7d0b71 Mon Sep 17 00:00:00 2001
From: Radu Berinde <radu@cockroachlabs.com>
Date: Wed, 23 Aug 2023 10:58:59 -0700
Subject: [PATCH 8/9] github: code coverage workflows

This change adds two GitHub Action workflows which run on each PR. One
generates unit test code coverage data, and one publishes that data to
a GCS bucket from where Reviewable can access it.

We generate coverage data using `bazel coverage`, but we restrict it
to only test the packages that have been modified by the PR.

Two workflows are required for security (the first workflow runs
potentially malicious code from a fork); for more details, see
https://securitylab.github.com/research/github-actions-preventing-pwn-requests/

Epic: none
Release note: None
---
 .github/workflows/code-cover-gen.yml      | 100 ++++++++++++++++++++++
 .github/workflows/code-cover-publish.yaml |  58 +++++++++++++
 build/ghactions/changed-go-pkgs.sh        |  14 +++
 build/ghactions/pr-codecov-run-tests.sh   |  49 +++++++++++
 4 files changed, 221 insertions(+)
 create mode 100644 .github/workflows/code-cover-gen.yml
 create mode 100644 .github/workflows/code-cover-publish.yaml
 create mode 100755 build/ghactions/changed-go-pkgs.sh
 create mode 100755 build/ghactions/pr-codecov-run-tests.sh

diff --git a/.github/workflows/code-cover-gen.yml b/.github/workflows/code-cover-gen.yml
new file mode 100644
index 000000000000..d5c1813343f4
--- /dev/null
+++ b/.github/workflows/code-cover-gen.yml
@@ -0,0 +1,100 @@
+name: PR code coverage (generate)
+on:
+  pull_request:
+    types: [ opened, reopened, synchronize ]
+    branches: [ master ]
+
+jobs:
+  code-cover-gen:
+    runs-on: ubuntu-latest
+    env:
+      PR: ${{ github.event.pull_request.number }}
+      HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+      GH_TOKEN: ${{ github.token }}
+      FETCH_DEPTH: 15
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          # By default, checkout merges the PR into the current master.
+          # Instead, we want to check out the PR as is.
+          ref: ${{ github.event.pull_request.head.sha }}
+          # Fetching the entire history is much slower; we only fetch the last
+          # 15 commits. As such, we don't support PRs with 15 commits or more
+          # (we cannot get to the "base" commit).
+          fetch-depth: ${{ env.FETCH_DEPTH }}
+
+      - name: Set up Bazel cache
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/bazel
+          key: ${{ runner.os }}-bazel-${{ hashFiles('.bazelversion', '.bazelrc', 'WORKSPACE', 'WORKSPACE.bazel', 'MODULE.bazel') }}
+          restore-keys: |
+            ${{ runner.os }}-bazel-    
+
+      - name: Get list of changed packages
+        shell: bash
+        run: |
+          set -euxo pipefail
+          
+          MAX_CHANGED_PKGS=20
+          FETCH_DEPTH=${{ env.FETCH_DEPTH }}
+          mkdir -p artifacts
+          
+          skip() {
+            echo "Skipping code coverage on PR #$PR: $1"
+            # Generate the json files with an error (which will show up in Reviewable).
+            jq -n --arg err "$1" '{error: $err}' > artifacts/cover-${PR}-${HEAD_SHA}.json 
+            if [ -n "${BASE_SHA:-}" ]; then
+              jq -n --arg err "$1" '{error: $err}' > artifacts/cover-${PR}-${BASE_SHA}.json 
+            fi
+            echo "SKIP=true" >> "${GITHUB_ENV}"
+            exit 0
+          }
+          
+          # To get the base commit, we get the number of commits in the PR.
+          # Note that github.event.pull_request.base.sha is not what we want,
+          # that is the tip of master and not necessarily the PR fork point.
+          NUM_COMMITS=$(gh pr view $PR --json commits --jq '.commits | length')
+          
+          # The number of commits bust be below the checkout fetch-depth.
+          if [ ${NUM_COMMITS} -ge ${FETCH_DEPTH} ]; then
+            skip "too many commits (${NUM_COMMITS})"
+          fi
+          BASE_SHA=$(git rev-parse HEAD~${NUM_COMMITS})
+          CHANGED_PKGS=$(build/ghactions/changed-go-pkgs.sh ${BASE_SHA} ${HEAD_SHA})
+          NUM_CHANGED_PKGS=$(echo "${CHANGED_PKGS}" | wc -w)
+          if [ ${NUM_CHANGED_PKGS} -gt ${MAX_CHANGED_PKGS} ]; then
+            skip "too many changed packages (${NUM_CHANGED_PKGS})"
+          fi
+          echo "BASE_SHA=${BASE_SHA}" >> "${GITHUB_ENV}"
+          echo "CHANGED_PKGS=${CHANGED_PKGS}" >> "${GITHUB_ENV}"
+
+      - name: Run "after" test coverage
+        if: env.SKIP != 'true'
+        shell: bash
+        run: |
+          set -euxo pipefail
+          CHANGED_PKGS='${{ env.CHANGED_PKGS }}'
+          # Make a copy of the script so that the "before" run below uses the
+          # same version.
+          cp build/ghactions/pr-codecov-run-tests.sh ${RUNNER_TEMP}/
+          ${RUNNER_TEMP}/pr-codecov-run-tests.sh artifacts/cover-${PR}-${HEAD_SHA}.json "${CHANGED_PKGS}"
+
+      - name: Run "before" test coverage
+        if: env.SKIP != 'true'
+        shell: bash
+        run: |
+          set -euxo pipefail
+          BASE_SHA='${{ env.BASE_SHA }}'
+          CHANGED_PKGS='${{ env.CHANGED_PKGS }}'
+          git checkout -f ${BASE_SHA}
+          ${RUNNER_TEMP}/pr-codecov-run-tests.sh artifacts/cover-${PR}-${BASE_SHA}.json "${CHANGED_PKGS}"
+
+      - name: Upload artifacts
+        # Note: we want to upload artifacts even if we skipped the steps above.
+        # See the skip function in the "Get list of changed packages" step.
+        uses: actions/upload-artifact@v2
+        with:
+          name: cover
+          path: artifacts/cover-*.json
diff --git a/.github/workflows/code-cover-publish.yaml b/.github/workflows/code-cover-publish.yaml
new file mode 100644
index 000000000000..8118c436f7ca
--- /dev/null
+++ b/.github/workflows/code-cover-publish.yaml
@@ -0,0 +1,58 @@
+name: PR code coverage (publish)
+
+on:
+  workflow_run:
+    workflows: [ "PR code coverage (generate)" ]
+    types: [ "completed" ]
+
+
+jobs:
+  # This job downloads the artifacts generated by the code-cover-gen job and
+  # uploads them to a GCS bucket, from where Reviewable can access them.
+  #
+  # Note that this workflow is not required for a PR to merge; a failure simply
+  # means that there won't be coverage data visible in Reviewable.
+  code-cover-publish:
+    runs-on: ubuntu-latest
+    if: >
+      github.event.workflow_run.event == 'pull_request' &&
+      github.event.workflow_run.conclusion == 'success'
+    steps:
+      - name: 'Download artifact'
+        uses: actions/github-script@v3.1.0
+        with:
+          script: |
+            var artifacts = await github.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: ${{github.event.workflow_run.id }},
+            });
+            var matchArtifact = artifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == "cover"
+            })[0];
+            var download = await github.actions.downloadArtifact({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               artifact_id: matchArtifact.id,
+               archive_format: 'zip',
+            });
+            var fs = require('fs');
+            fs.writeFileSync('${{github.workspace}}/cover.zip', Buffer.from(download.data));
+
+      - run: |
+          mkdir -p cover
+          unzip cover.zip -d cover
+
+      - name: 'Authenticate to Google Cloud'
+        uses: 'google-github-actions/auth@v1'
+        with:
+          credentials_json: '${{ secrets.CODECOVER_SERVICE_ACCOUNT_KEY }}'
+
+      - name: 'Upload to GCS'
+        uses: 'google-github-actions/upload-cloud-storage@v1'
+        with:
+          path: 'cover'
+          glob: '**/cover-*.json'
+          parent: false
+          destination: 'crl-codecover-public/pr-cockroach/'
+          process_gcloudignore: false
diff --git a/build/ghactions/changed-go-pkgs.sh b/build/ghactions/changed-go-pkgs.sh
new file mode 100755
index 000000000000..49d0dc76527b
--- /dev/null
+++ b/build/ghactions/changed-go-pkgs.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+BASE_SHA="$1"
+HEAD_SHA="$2"
+
+if [ -z "$HEAD_SHA" ];then
+    echo "Usage: $0 <base-sha> <head-sha>"
+    exit 1
+fi
+
+git diff --name-only "${BASE_SHA}..${HEAD_SHA}" -- "pkg/**/*.go" ":!*/testdata/*" ":!pkg/acceptance/compose/gss/psql/**" \
+  | xargs -rn1 dirname \
+  | sort -u \
+  | { while read path; do if ls "$path"/*.go &>/dev/null; then echo -n "$path "; fi; done; }
diff --git a/build/ghactions/pr-codecov-run-tests.sh b/build/ghactions/pr-codecov-run-tests.sh
new file mode 100755
index 000000000000..264ee01dfd2c
--- /dev/null
+++ b/build/ghactions/pr-codecov-run-tests.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+set -xeuo pipefail
+
+output_json_file="$1"
+packages="$2"
+
+if [ -z "${packages}" ]; then
+  echo "No packages; skipping"
+  touch "${output_json_file}"
+  exit 0
+fi
+
+
+# Find the targets. We need to convert from, e.g.
+#   pkg/util/log/logpb pkg/util/quotapool
+# to
+#   //pkg/util/log/logpb:* + //pkg/util/quotapool:*
+
+paths=""
+sep=""
+for p in ${packages}; do
+  paths="${paths}${sep}//$p:*"
+  sep=" + "
+done
+
+targets=$(bazel query "kind(\".*_test\", ${paths})")
+
+if [[ -z "${targets}" ]]; then
+  echo "No test targets found"
+  exit 0
+fi
+
+echo "Running tests"
+
+# TODO(radu): do we need --strip=never?
+bazel coverage \
+  --@io_bazel_rules_go//go/config:cover_format=lcov --combined_report=lcov \
+  --instrumentation_filter="//pkg/..." \
+  ${targets}
+
+lcov_file="$(bazel info output_path)/_coverage/_coverage_report.dat"
+if [ ! -f "${lcov_file}" ]; then
+  echo "Coverage file ${lcov_file} does not exist"
+  exit 1
+fi
+
+echo "Converting coverage file"
+bazel run @go_sdk//:bin/go -- run github.com/cockroachdb/code-cov-utils/lcov2json@v1.0.0 "${lcov_file}" "${output_json_file}"

From 4240ac65015038c983efc50147b4e30720bfcffe Mon Sep 17 00:00:00 2001
From: Radu Berinde <radu@cockroachlabs.com>
Date: Wed, 23 Aug 2023 11:07:37 -0700
Subject: [PATCH 9/9] roachprod: better determination if scp -R flag can be
 used

When uploading a file to a cluster, we use the "tree dist" algorithm
by default. This uploads the file to a single node, then we copy the
file from that node to the other nodes (up to 10).

This only makes sense if the remote-to-remote transfers can happen
directly, which only happens if we pass the `-R -A` flags to `scp`.
Unfortunately older versions don't support these flags. Currently the
flags are only passed if the OS is `darwin`.

This commits improves the determination - we run `ssh -V` (once) and
check if the `SSL` major version is three. For reference, some
examples of what `ssh -V` returns:
 - recent MacOSX: `OpenSSH_9.0p1, LibreSSL 3.3.6`
 - Ubuntu 22.04: `OpenSSH_8.9p1 Ubuntu-3ubuntu0.3, OpenSSL 3.0.2 15 Mar 2022`

In addition, if the version is not 3, we disable the use of "tree
dist".

Epic: none
Release note: None
---
 pkg/roachprod/install/cluster_synced.go | 35 ++++++++++++++++++-------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/pkg/roachprod/install/cluster_synced.go b/pkg/roachprod/install/cluster_synced.go
index 975fd25b2f75..8cdfaa1cd610 100644
--- a/pkg/roachprod/install/cluster_synced.go
+++ b/pkg/roachprod/install/cluster_synced.go
@@ -22,7 +22,6 @@ import (
 	"os/exec"
 	"os/signal"
 	"path/filepath"
-	"runtime"
 	"strings"
 	"sync"
 	"syscall"
@@ -1750,9 +1749,12 @@ func (c *SyncedCluster) Put(
 	// not tested.
 	const treeDistFanout = 10
 
+	// If scp does not support the -R flag, treedist mode is slower as it ends up
+	// transferring everything through this machine anyway.
+	useTreeDist := c.UseTreeDist && sshVersion3()
 	var detail string
 	if !c.IsLocal() {
-		if c.UseTreeDist {
+		if useTreeDist {
 			detail = " (dist)"
 		} else {
 			detail = " (scp)"
@@ -1783,7 +1785,7 @@ func (c *SyncedCluster) Put(
 		}
 	}
 
-	if c.UseTreeDist {
+	if useTreeDist {
 		// In treedist mode, only add the local source initially.
 		pushSource(-1)
 	} else {
@@ -2452,6 +2454,24 @@ func (c *SyncedCluster) SSH(ctx context.Context, l *logger.Logger, sshArgs, args
 	return syscall.Exec(sshPath, allArgs, os.Environ())
 }
 
+var sshVersion3Internal struct {
+	value bool
+	once  sync.Once
+}
+
+// sshVersion3 returns true if ssh uses an SSL library at major version 3.
+func sshVersion3() bool {
+	sshVersion3Internal.once.Do(func() {
+		cmd := exec.Command("ssh", "-V")
+		out, err := cmd.CombinedOutput()
+		if err != nil {
+			panic(fmt.Sprintf("error running ssh -V: %v\nOutput:\n%s", err, string(out)))
+		}
+		sshVersion3Internal.value = strings.Contains(string(out), "SSL 3.")
+	})
+	return sshVersion3Internal.value
+}
+
 // scp return type conforms to what runWithMaybeRetry expects. A nil error
 // is always returned here since the only error that can happen is an scp error
 // which we do want to be able to retry.
@@ -2462,12 +2482,9 @@ func scp(l *logger.Logger, src, dest string) (*RunResultDetails, error) {
 		"-o", "StrictHostKeyChecking=no",
 		"-o", "ConnectTimeout=10",
 	}
-	if runtime.GOOS == "darwin" {
-		// SSH to src node and excute SCP there using agent-forwarding,
-		// as these are not the defaults on MacOS.
-		// TODO(sarkesian): Rather than checking Darwin, it would be preferable
-		// to check the output of `ssh-V` and check the version to see what flags
-		// are supported.
+	if sshVersion3() {
+		// Have scp do a direct transfer between two remote hosts (SSH to src node
+		// and execute SCP there using agent-forwarding).
 		args = append(args, "-R", "-A")
 	}
 	args = append(args, sshAuthArgs()...)