From 6b89733c29ecd0e9161a7b1d30adb8a897cd7a28 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 24 Aug 2021 07:53:58 +0000 Subject: [PATCH] roachtest: increase consistency check timeout, and ignore errors This bumps the consistency check timeout to 5 minutes. There are indications that a recent libpq upgrade unmasked previously ignored context cancellation errors, caused by the timeout here being too low. It also ignores errors during the consistency check, since it is best-effort anyway. Release justification: non-production code changes Release note: None --- pkg/cmd/roachtest/cluster.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go index a6e5d0568eca..1203c253c439 100644 --- a/pkg/cmd/roachtest/cluster.go +++ b/pkg/cmd/roachtest/cluster.go @@ -1266,7 +1266,7 @@ func (c *clusterImpl) CheckReplicaDivergenceOnDB( // // We've seen the consistency checks hang indefinitely in some cases. rows, err := db.QueryContext(ctx, ` -SET statement_timeout = '3m'; +SET statement_timeout = '5m'; SELECT t.range_id, t.start_key_pretty, t.status, t.detail FROM crdb_internal.check_consistency(true, '', '') as t @@ -1278,20 +1278,22 @@ WHERE t.status NOT IN ('RANGE_CONSISTENT', 'RANGE_INDETERMINATE')`) l.Printf("consistency check failed with %v; ignoring", err) return nil } + defer rows.Close() var finalErr error for rows.Next() { var rangeID int32 var prettyKey, status, detail string if scanErr := rows.Scan(&rangeID, &prettyKey, &status, &detail); scanErr != nil { - return scanErr + l.Printf("consistency check failed with %v; ignoring", scanErr) + return nil } finalErr = errors.CombineErrors(finalErr, errors.Newf("r%d (%s) is inconsistent: %s %s\n", rangeID, prettyKey, status, detail)) } if err := rows.Err(); err != nil { - finalErr = errors.CombineErrors(finalErr, err) + l.Printf("consistency check failed with %v; ignoring", err) + return nil } - return finalErr } @@ -1330,7 +1332,7 @@ func (c *clusterImpl) FailOnReplicaDivergence(ctx context.Context, t test.Test) defer db.Close() if err := contextutil.RunWithTimeout( - ctx, "consistency check", time.Minute, + ctx, "consistency check", 5*time.Minute, func(ctx context.Context) error { return c.CheckReplicaDivergenceOnDB(ctx, t.L(), db) },