From a02c502c99ad709dba1b30033ffc2f53a2ebddd5 Mon Sep 17 00:00:00 2001
From: Daniel Nephin <dnephin@gmail.com>
Date: Fri, 14 May 2021 17:56:19 -0400
Subject: [PATCH 1/5] ci: update config

- test against fewer go versions, only the last 2 major versions of
  Go are supported so we can run tests against only those two
- only test against version with 32bit
- remove the CircleCI module cache. With the new module proxy it is faster to
  download from the proxy.

Also refactor the config a bit to:

- remove duplication in the go-test jobs (we can specify GOARCH as a
  parameter)
- use the same executor for both jobs, so that the docker registry url
  can be specified in a single place
- remove the need for any yaml anchors and aliases
---
 .circleci/config.yml | 147 ++++++++++++++-----------------------------
 1 file changed, 47 insertions(+), 100 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 97400b6bd59..089a754ddd2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,38 +1,47 @@
 version: 2.1
 
-references:
-  images:
-    go-1.13: &GOLANG_1_13_IMAGE docker.mirror.hashicorp.services/circleci/golang:1.13
-    go-1.14: &GOLANG_1_14_IMAGE docker.mirror.hashicorp.services/circleci/golang:1.14
-    go-1.15: &GOLANG_1_15_IMAGE docker.mirror.hashicorp.services/circleci/golang:1.15
-    go-1.16: &GOLANG_1_16_IMAGE docker.mirror.hashicorp.services/circleci/golang:1.16
+workflows:
+  ci:
+    jobs:
+      - lint
+      - go-test:
+          name: test go1.15
+          version: "1.15"
+      - go-test:
+          name: test go1.16
+          version: "1.16"
+      - go-test:
+          name: test go1.16 32bit
+          version: "1.16"
+          goarch: "386"
+          target: ci.test-norace
 
-  environment: &ENVIRONMENT
-    TEST_RESULTS_DIR: &TEST_RESULTS_DIR /tmp/test-results # path to where test results are saved
-    GOTRACEBACK: "all"
-    GO111MODULE: "on"
-    GOMAXPROCS: 2
+executors:
+  golang:
+    parameters:
+      version:
+        type: string
+      goarch:
+        type: string
+        default: amd64
+    docker:
+      - image: docker.mirror.hashicorp.services/circleci/golang:<<parameters.version>>
+    environment:
+      TEST_RESULTS_DIR: /tmp/test-results
+      GOTRACEBACK: "all"
+      GO111MODULE: "on"
+      GOMAXPROCS: 2
+      GOARCH: <<parameters.goarch>>
 
 jobs:
-  go-fmt-and-vet:
-    docker:
-      - image: *GOLANG_1_14_IMAGE
+  lint:
+    executor:
+      name: golang
+      version: "1.16"
     steps:
       - checkout
-
-      # Restore go module cache if there is one
-      - restore_cache:
-          keys:
-            - raft-modcache-v1-{{ checksum "go.mod" }}
-
       - run: go mod download
 
-      # Save go module cache if the go.mod file has changed
-      - save_cache:
-          key: raft-modcache-v1-{{ checksum "go.mod" }}
-          paths:
-            - "/go/pkg/mod"
-
       # check go fmt output because it does not report non-zero when there are fmt changes
       - run:
           name: check go fmt
@@ -48,87 +57,25 @@ jobs:
           go vet $PACKAGE_NAMES
 
   go-test:
-    description: Runs go tests
     parameters:
-      go-version:
-        description: what go version to use
+      version:
         type: string
-    docker:
-      - image: << parameters.go-version >>
-    environment: *ENVIRONMENT
-    steps:
-      - checkout
-      - run: mkdir -p $TEST_RESULTS_DIR
-
-      - restore_cache: # restore cache
-          keys:
-            - raft-modcache-v1-{{ checksum "go.mod" }}
-
-      # run go tests with gotestsum
-      - run: make ci.integ
-      - store_test_results:
-          path: /tmp/test-results
-      - store_artifacts:
-          path: /tmp/test-results
-
-  go-test-32bit:
-    description: Runs go tests - 32 bit
-    parameters:
-      go-version:
-        description: what go version to use
+      goarch:
         type: string
-    docker:
-      - image: << parameters.go-version >>
-    environment:
-      <<: *ENVIRONMENT
-      GOARCH: "386"
+        default: amd64
+      target:
+        type: string
+        default: ci.integ
+    executor:
+      name: golang
+      version: <<parameters.version>>
+      goarch: <<parameters.goarch>>
     steps:
+      - run: go env
       - checkout
       - run: mkdir -p $TEST_RESULTS_DIR
-
-      - restore_cache: # restore cache
-          keys:
-            - raft-modcache-v1-{{ checksum "go.mod" }}
-
-      # run go tests with gotestsum
-      - run: make ci.test-norace
+      - run: make <<parameters.target>>
       - store_test_results:
           path: /tmp/test-results
       - store_artifacts:
           path: /tmp/test-results
-
-workflows:
-  version: 2
-  test-and-build:
-    jobs:
-      - go-fmt-and-vet
-      - go-test:
-          name: test go1.13
-          go-version: *GOLANG_1_13_IMAGE
-          requires:
-            - go-fmt-and-vet
-      - go-test:
-          name: test go1.14
-          go-version: *GOLANG_1_14_IMAGE
-          requires:
-            - go-fmt-and-vet
-      - go-test:
-          name: test go1.15
-          go-version: *GOLANG_1_15_IMAGE
-          requires:
-            - go-fmt-and-vet
-      - go-test:
-          name: test go1.16
-          go-version: *GOLANG_1_16_IMAGE
-          requires:
-            - go-fmt-and-vet
-      - go-test-32bit:
-          name: test go1.13 - 32bit
-          go-version: *GOLANG_1_13_IMAGE
-          requires:
-            - go-fmt-and-vet
-      - go-test-32bit:
-          name: test go1.14 - 32bit
-          go-version: *GOLANG_1_14_IMAGE
-          requires:
-            - go-fmt-and-vet

From df671c75a57af710c0d99918865aa7708ef327a3 Mon Sep 17 00:00:00 2001
From: Daniel Nephin <dnephin@gmail.com>
Date: Fri, 14 May 2021 18:24:35 -0400
Subject: [PATCH 2/5] Fix new 'go vet' errors reported by Go1.16

Calling Fatalf in a goroutine is a problem because Fatalf calls FailNow, which calls runtime.Goexit.
Goexit stops the goroutine, but nothing else. When called from a goroutine that is not the main test
Goroutine this does not have the desired affect. It does not stop the test from running.

Use Errorf and return for now. In the future we may want to replace these with a channel that sends
errors back to the main goroutine.
---
 net_transport_test.go | 39 +++++++++++++++++++++++++--------------
 transport_test.go     | 24 +++++++++++++++---------
 2 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/net_transport_test.go b/net_transport_test.go
index 77b1825ff50..d42d469db78 100644
--- a/net_transport_test.go
+++ b/net_transport_test.go
@@ -3,8 +3,6 @@ package raft
 import (
 	"bytes"
 	"fmt"
-	"github.com/hashicorp/go-hclog"
-	"github.com/stretchr/testify/require"
 	"net"
 	"reflect"
 	"strings"
@@ -12,6 +10,9 @@ import (
 	"sync/atomic"
 	"testing"
 	"time"
+
+	"github.com/hashicorp/go-hclog"
+	"github.com/stretchr/testify/require"
 )
 
 type testAddrProvider struct {
@@ -60,7 +61,8 @@ func TestNetworkTransport_CloseStreams(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*AppendEntriesRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 				rpc.Respond(&resp, nil)
 
@@ -220,13 +222,14 @@ func TestNetworkTransport_AppendEntries(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*AppendEntriesRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 
 				rpc.Respond(&resp, nil)
 
 			case <-time.After(200 * time.Millisecond):
-				t.Fatalf("timeout")
+				t.Errorf("timeout")
 			}
 		}()
 
@@ -290,12 +293,14 @@ func TestNetworkTransport_AppendEntriesPipeline(t *testing.T) {
 					// Verify the command
 					req := rpc.Command.(*AppendEntriesRequest)
 					if !reflect.DeepEqual(req, &args) {
-						t.Fatalf("command mismatch: %#v %#v", *req, args)
+						t.Errorf("command mismatch: %#v %#v", *req, args)
+						return
 					}
 					rpc.Respond(&resp, nil)
 
 				case <-time.After(200 * time.Millisecond):
-					t.Fatalf("timeout")
+					t.Errorf("timeout")
+					return
 				}
 			}
 		}()
@@ -376,7 +381,8 @@ func TestNetworkTransport_AppendEntriesPipeline_CloseStreams(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*AppendEntriesRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 				rpc.Respond(&resp, nil)
 
@@ -472,13 +478,15 @@ func TestNetworkTransport_RequestVote(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*RequestVoteRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 
 				rpc.Respond(&resp, nil)
 
 			case <-time.After(200 * time.Millisecond):
-				t.Fatalf("timeout")
+				t.Errorf("timeout")
+				return
 			}
 		}()
 
@@ -533,7 +541,8 @@ func TestNetworkTransport_InstallSnapshot(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*InstallSnapshotRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 
 				// Try to read the bytes
@@ -542,13 +551,14 @@ func TestNetworkTransport_InstallSnapshot(t *testing.T) {
 
 				// Compare
 				if bytes.Compare(buf, []byte("0123456789")) != 0 {
-					t.Fatalf("bad buf %v", buf)
+					t.Errorf("bad buf %v", buf)
+					return
 				}
 
 				rpc.Respond(&resp, nil)
 
 			case <-time.After(200 * time.Millisecond):
-				t.Fatalf("timeout")
+				t.Errorf("timeout")
 			}
 		}()
 
@@ -647,7 +657,8 @@ func TestNetworkTransport_PooledConn(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*AppendEntriesRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 				rpc.Respond(&resp, nil)
 
diff --git a/transport_test.go b/transport_test.go
index 91d51e95f3f..5a59253df50 100644
--- a/transport_test.go
+++ b/transport_test.go
@@ -66,12 +66,13 @@ func TestTransport_AppendEntries(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*AppendEntriesRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 				rpc.Respond(&resp, nil)
 
 			case <-time.After(200 * time.Millisecond):
-				t.Fatalf("timeout")
+				t.Errorf("timeout")
 			}
 		}()
 
@@ -129,12 +130,14 @@ func TestTransport_AppendEntriesPipeline(t *testing.T) {
 					// Verify the command
 					req := rpc.Command.(*AppendEntriesRequest)
 					if !reflect.DeepEqual(req, &args) {
-						t.Fatalf("command mismatch: %#v %#v", *req, args)
+						t.Errorf("command mismatch: %#v %#v", *req, args)
+						return
 					}
 					rpc.Respond(&resp, nil)
 
 				case <-time.After(200 * time.Millisecond):
-					t.Fatalf("timeout")
+					t.Errorf("timeout")
+					return
 				}
 			}
 		}()
@@ -198,13 +201,14 @@ func TestTransport_RequestVote(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*RequestVoteRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 
 				rpc.Respond(&resp, nil)
 
 			case <-time.After(200 * time.Millisecond):
-				t.Fatalf("timeout")
+				t.Errorf("timeout")
 			}
 		}()
 
@@ -254,7 +258,8 @@ func TestTransport_InstallSnapshot(t *testing.T) {
 				// Verify the command
 				req := rpc.Command.(*InstallSnapshotRequest)
 				if !reflect.DeepEqual(req, &args) {
-					t.Fatalf("command mismatch: %#v %#v", *req, args)
+					t.Errorf("command mismatch: %#v %#v", *req, args)
+					return
 				}
 
 				// Try to read the bytes
@@ -263,13 +268,14 @@ func TestTransport_InstallSnapshot(t *testing.T) {
 
 				// Compare
 				if bytes.Compare(buf, []byte("0123456789")) != 0 {
-					t.Fatalf("bad buf %v", buf)
+					t.Errorf("bad buf %v", buf)
+					return
 				}
 
 				rpc.Respond(&resp, nil)
 
 			case <-time.After(200 * time.Millisecond):
-				t.Fatalf("timeout")
+				t.Errorf("timeout")
 			}
 		}()
 

From e60cb687f078a86625aaf4a97d7fef9c59a62610 Mon Sep 17 00:00:00 2001
From: Daniel Nephin <dnephin@gmail.com>
Date: Fri, 14 May 2021 19:17:29 -0400
Subject: [PATCH 3/5] Only run Test_RecoverCluster 3 times

We really only care about 3 cases, so make that explicit
---
 raft_test.go | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/raft_test.go b/raft_test.go
index 03837b31239..5986246533a 100644
--- a/raft_test.go
+++ b/raft_test.go
@@ -120,13 +120,12 @@ func TestRaft_RecoverCluster_NoState(t *testing.T) {
 }
 
 func TestRaft_RecoverCluster(t *testing.T) {
-	// Run with different number of applies which will cover no snapshot and
-	// snapshot + log scenarios. By sweeping through the trailing logs value
-	// we will also hit the case where we have a snapshot only.
-	var err error
+	snapshotThreshold := 5
 	runRecover := func(t *testing.T, applies int) {
+		var err error
 		conf := inmemConfig(t)
 		conf.TrailingLogs = 10
+		conf.SnapshotThreshold = uint64(snapshotThreshold)
 		c := MakeCluster(3, t, conf)
 		defer c.Close()
 
@@ -212,11 +211,16 @@ func TestRaft_RecoverCluster(t *testing.T) {
 		c.EnsureSame(t)
 		c.EnsureSamePeers(t)
 	}
-	for applies := 0; applies < 10; applies++ {
-		t.Run(fmt.Sprintf("%d applies", applies), func(t *testing.T) {
-			runRecover(t, applies)
-		})
-	}
+
+	t.Run("no snapshot, no trailing logs", func(t *testing.T) {
+		runRecover(t, 0)
+	})
+	t.Run("snapshot", func(t *testing.T) {
+		runRecover(t, snapshotThreshold)
+	})
+	t.Run("snapshot, with trailing logs", func(t *testing.T) {
+		runRecover(t, snapshotThreshold+20)
+	})
 }
 
 func TestRaft_HasExistingState(t *testing.T) {

From 0b418e9681d308ced406ece8b19d5efd0783898e Mon Sep 17 00:00:00 2001
From: Daniel Nephin <dnephin@gmail.com>
Date: Fri, 14 May 2021 19:27:45 -0400
Subject: [PATCH 4/5] ci: capture all test runs

Previously we were running the test suite twice, once for batch and one
without. The second run was clobbering the junit.xml file, which meant
that CircleCI would only see the data for half the tests.

This commit also removes the ci targets out of the makefile and into CI.
Putting these targets in the makefile hides problems like this. The
targets are already tightly coupled to the CI config (because they
require env vars defined in the CI config), so there's no advantage to
putting them into a makefile.
---
 .circleci/config.yml | 16 ++++++++++++----
 Makefile             | 12 ------------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 089a754ddd2..6bb94cf5493 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -14,7 +14,7 @@ workflows:
           name: test go1.16 32bit
           version: "1.16"
           goarch: "386"
-          target: ci.test-norace
+          args: "" # remove -race
 
 executors:
   golang:
@@ -63,9 +63,9 @@ jobs:
       goarch:
         type: string
         default: amd64
-      target:
+      args:
         type: string
-        default: ci.integ
+        default: "-race"
     executor:
       name: golang
       version: <<parameters.version>>
@@ -74,7 +74,15 @@ jobs:
       - run: go env
       - checkout
       - run: mkdir -p $TEST_RESULTS_DIR
-      - run: make <<parameters.target>>
+      - run:
+          name: run tests
+          environment:
+            INTEG_TESTS: "yes"
+            GOTESTSUM_FORMAT: short-verbose
+          command: |
+            gotestsum --junitfile ${TEST_RESULTS_DIR}/junit.xml       -- -timeout=240s <<parameters.args>> .
+            gotestsum --junitfile ${TEST_RESULTS_DIR}/junit-batch.xml -- -timeout=240s <<parameters.args>> -tags batchtest .
+
       - store_test_results:
           path: /tmp/test-results
       - store_artifacts:
diff --git a/Makefile b/Makefile
index e1f7bd49c13..c988f0ab2f4 100644
--- a/Makefile
+++ b/Makefile
@@ -23,18 +23,6 @@ integ: test
 	INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -run=Integ .
 	INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -tags batchtest -run=Integ .
 
-ci.test-norace:
-	gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s
-	gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -tags batchtest
-
-ci.test:
-	gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -race .
-	gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -race -tags batchtest .
-
-ci.integ: ci.test
-	INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=60s -run=Integ .
-	INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=60s -run=Integ -tags batchtest .
-
 fuzz:
 	cd ./fuzzy && go test $(TESTARGS) -timeout=20m .
 	cd ./fuzzy && go test $(TESTARGS) -timeout=20m -tags batchtest .

From f93d7eb351840b8c87f58da775ed32d287f5e4cb Mon Sep 17 00:00:00 2001
From: Daniel Nephin <dnephin@gmail.com>
Date: Mon, 17 May 2021 14:40:38 -0400
Subject: [PATCH 5/5] Update a case in TestRaft_RecoverCluster

To cover the proper case.
---
 raft_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/raft_test.go b/raft_test.go
index 5986246533a..51db44a8219 100644
--- a/raft_test.go
+++ b/raft_test.go
@@ -215,8 +215,8 @@ func TestRaft_RecoverCluster(t *testing.T) {
 	t.Run("no snapshot, no trailing logs", func(t *testing.T) {
 		runRecover(t, 0)
 	})
-	t.Run("snapshot", func(t *testing.T) {
-		runRecover(t, snapshotThreshold)
+	t.Run("no snapshot, some trailing logs", func(t *testing.T) {
+		runRecover(t, snapshotThreshold-1)
 	})
 	t.Run("snapshot, with trailing logs", func(t *testing.T) {
 		runRecover(t, snapshotThreshold+20)