Skip to content

Commit

Permalink
Merge pull request #14687 from serathius/random-failpoint
Browse files Browse the repository at this point in the history
tests: Add triggering random go failpoints to linearizability tests
  • Loading branch information
serathius authored Nov 7, 2022
2 parents f64bed6 + 219278b commit 554b1bd
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 15 deletions.
83 changes: 76 additions & 7 deletions tests/linearizability/failpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,40 @@ import (
"math/rand"
"net/http"
"net/url"
"strings"
"time"

"go.uber.org/zap"

clientv3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/tests/v3/framework/e2e"
)

var (
KillFailpoint Failpoint = killFailpoint{}
RaftBeforeSavePanic Failpoint = goFailpoint{"etcdserver/raftBeforeSave", "panic"}
KillFailpoint Failpoint = killFailpoint{}
DefragBeforeCopyPanic Failpoint = goFailpoint{"backend/defragBeforeCopy", "panic", triggerDefrag}
DefragBeforeRenamePanic Failpoint = goFailpoint{"backend/defragBeforeRename", "panic", triggerDefrag}
BeforeCommitPanic Failpoint = goFailpoint{"backend/beforeCommit", "panic", nil}
AfterCommitPanic Failpoint = goFailpoint{"backend/afterCommit", "panic", nil}
RaftBeforeSavePanic Failpoint = goFailpoint{"etcdserver/raftBeforeSave", "panic", nil}
RaftAfterSavePanic Failpoint = goFailpoint{"etcdserver/raftAfterSave", "panic", nil}
RandomFailpoint Failpoint = randomFailpoint{[]Failpoint{
KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic,
RaftAfterSavePanic, DefragBeforeCopyPanic, DefragBeforeRenamePanic,
}}
// TODO: Figure out how to reliably trigger below failpoints and add them to RandomFailpoint
raftBeforeLeaderSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeLeaderSend", "panic", nil}
raftBeforeApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeApplySnap", "panic", nil}
raftAfterApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterApplySnap", "panic", nil}
raftAfterWALReleasePanic Failpoint = goFailpoint{"etcdserver/raftAfterWALRelease", "panic", nil}
raftBeforeFollowerSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeFollowerSend", "panic", nil}
raftBeforeSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeSaveSnap", "panic", nil}
raftAfterSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterSaveSnap", "panic", nil}
)

type Failpoint interface {
Trigger(ctx context.Context, clus *e2e.EtcdProcessCluster) error
Name() string
}

type killFailpoint struct{}
Expand All @@ -54,30 +76,41 @@ func (f killFailpoint) Trigger(ctx context.Context, clus *e2e.EtcdProcessCluster
return nil
}

func (f killFailpoint) Name() string {
return "Kill"
}

type goFailpoint struct {
failpoint string
payload string
trigger func(ctx context.Context, member e2e.EtcdProcess) error
}

func (f goFailpoint) Trigger(ctx context.Context, clus *e2e.EtcdProcessCluster) error {
member := clus.Procs[rand.Int()%len(clus.Procs)]
address := fmt.Sprintf("127.0.0.1:%d", member.Config().GoFailPort)
err := triggerGoFailpoint(address, f.failpoint, f.payload)
err := setupGoFailpoint(address, f.failpoint, f.payload)
if err != nil {
return fmt.Errorf("failed to trigger failpoint %q, err: %v", f.failpoint, err)
return fmt.Errorf("gofailpoint setup failed: %w", err)
}
err = clus.Procs[0].Wait()
if f.trigger != nil {
err = f.trigger(ctx, member)
if err != nil {
return fmt.Errorf("triggering gofailpoint failed: %w", err)
}
}
err = member.Wait()
if err != nil {
return err
}
err = clus.Procs[0].Start(ctx)
err = member.Start(ctx)
if err != nil {
return err
}
return nil
}

func triggerGoFailpoint(host, failpoint, payload string) error {
func setupGoFailpoint(host, failpoint, payload string) error {
failpointUrl := url.URL{
Scheme: "http",
Host: host,
Expand All @@ -98,6 +131,42 @@ func triggerGoFailpoint(host, failpoint, payload string) error {
return nil
}

func (f goFailpoint) Name() string {
return f.failpoint
}

func triggerDefrag(ctx context.Context, member e2e.EtcdProcess) error {
cc, err := clientv3.New(clientv3.Config{
Endpoints: member.EndpointsV3(),
Logger: zap.NewNop(),
DialKeepAliveTime: 1 * time.Millisecond,
DialKeepAliveTimeout: 5 * time.Millisecond,
})
if err != nil {
return fmt.Errorf("failed creating client: %w", err)
}
defer cc.Close()
_, err = cc.Defragment(ctx, member.EndpointsV3()[0])
if err != nil && !strings.Contains(err.Error(), "error reading from server: EOF") {
return err
}
return nil
}

var httpClient = http.Client{
Timeout: 10 * time.Millisecond,
}

type randomFailpoint struct {
failpoints []Failpoint
}

func (f randomFailpoint) Trigger(ctx context.Context, clus *e2e.EtcdProcessCluster) error {
failpoint := f.failpoints[rand.Int()%len(f.failpoints)]
fmt.Printf("Triggering %v failpoint\n", failpoint.Name())
return failpoint.Trigger(ctx, clus)
}

func (f randomFailpoint) Name() string {
return "Random"
}
19 changes: 11 additions & 8 deletions tests/linearizability/linearizability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,19 @@ func TestLinearizability(t *testing.T) {
config e2e.EtcdProcessClusterConfig
}{
{
name: "KillClusterOfSize1",
failpoint: KillFailpoint,
name: "ClusterOfSize1",
failpoint: RandomFailpoint,
config: e2e.EtcdProcessClusterConfig{
ClusterSize: 1,
ClusterSize: 1,
GoFailEnabled: true,
},
},
{
name: "KillClusterOfSize3",
failpoint: KillFailpoint,
name: "ClusterOfSize3",
failpoint: RandomFailpoint,
config: e2e.EtcdProcessClusterConfig{
ClusterSize: 3,
ClusterSize: 3,
GoFailEnabled: true,
},
},
{
Expand Down Expand Up @@ -96,7 +98,7 @@ func testLinearizability(ctx context.Context, t *testing.T, config e2e.EtcdProce
ctx, cancel := context.WithCancel(ctx)
go func() {
defer cancel()
err := triggerFailpoints(ctx, clus, failpoint)
err := triggerFailpoints(ctx, t, clus, failpoint)
if err != nil {
t.Error(err)
}
Expand All @@ -120,14 +122,15 @@ func testLinearizability(ctx context.Context, t *testing.T, config e2e.EtcdProce
t.Logf("saving visualization to %q", path)
}

func triggerFailpoints(ctx context.Context, clus *e2e.EtcdProcessCluster, config FailpointConfig) error {
func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config FailpointConfig) error {
var err error
successes := 0
failures := 0
time.Sleep(config.waitBetweenTriggers)
for successes < config.count && failures < config.count {
err = config.failpoint.Trigger(ctx, clus)
if err != nil {
t.Logf("Failed to trigger failpoint %q, err: %v\n", config.failpoint.Name(), err)
failures++
continue
}
Expand Down

0 comments on commit 554b1bd

Please sign in to comment.