Skip to content

Commit

Permalink
txsource: Shut down nodes for longer period of time
Browse files Browse the repository at this point in the history
  • Loading branch information
ptrus committed Sep 22, 2020
1 parent 4dd069d commit 2e00142
Show file tree
Hide file tree
Showing 11 changed files with 221 additions and 109 deletions.
9 changes: 5 additions & 4 deletions .buildkite/scripts/daily_txsource.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ if [[ $BUILDKITE_RETRY_COUNT == 0 ]]; then
--metrics.labels instance=$BUILDKITE_PIPELINE_NAME-$BUILDKITE_BUILD_NUMBER \
--scenario e2e/runtime/txsource-multi
else
curl -H "Content-Type: application/json" \
-X POST \
--data "{\"text\": \"Daily transaction source tests failure\"}" \
"$SLACK_WEBHOOK_URL"
# TODO: uncomment.
# curl -H "Content-Type: application/json" \
# -X POST \
# --data "{\"text\": \"Daily transaction source tests failure\"}" \
# "$SLACK_WEBHOOK_URL"

# Exit with non-zero exit code, so that the buildkite build will be
# marked as failed.
Expand Down
1 change: 1 addition & 0 deletions .changelog/3223.internal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ci/txsource: Shut down nodes for longer period of time
17 changes: 4 additions & 13 deletions go/oasis-node/cmd/debug/txsource/workload/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ const (
// in a single iteration. The purpose of this timeout is to prevent the client being stuck and
// treating that as an error instead.
queriesIterationTimeout = 60 * time.Second

// queriesMaxConsecutiveRetries is the maximum number of consecutive retries for unavailable
// nodes.
queriesMaxConsecutiveRetries = 30
)

// QueriesFlags are the queries workload flags.
Expand Down Expand Up @@ -775,33 +771,28 @@ func (q *queries) Run(gracefulExit context.Context, rng *rand.Rand, conn *grpc.C
}
}

var retries int
for {
if retries > queriesMaxConsecutiveRetries {
return fmt.Errorf("too many consecutive retries")
}

loopCtx, cancel := context.WithTimeout(ctx, queriesIterationTimeout)

err := q.doQueries(loopCtx, rng)
cancel()
switch {
case err == nil:
retries = 0
case cmnGrpc.IsErrorCode(err, codes.Unavailable):
// Don't immediately fail when the node is unavailable as it may be restarting.
// Don't fail when the node is unavailable as it may be restarting.
// If the node was shutdown unexpectedly the test runner will fail
// the test.
q.logger.Warn("node unavailable, retrying",
"err", err,
)
retries++
default:
return err
}

select {
case <-time.After(1 * time.Second):
case <-gracefulExit.Done():
oversizedLogger.Debug("time's up")
q.logger.Debug("time's up")
return nil
}
}
Expand Down
12 changes: 11 additions & 1 deletion go/oasis-test-runner/oasis/oasis.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,20 @@ func (n *Node) Stop() error {
}

// Restart kills the node, waits for it to stop, and starts it again.
func (n *Node) Restart() error {
func (n *Node) Restart(ctx context.Context) error {
return n.RestartAfter(ctx, 0)
}

// RestartAfter kills the node, waits for it to stop, and starts it again after delay.
func (n *Node) RestartAfter(ctx context.Context, startDelay time.Duration) error {
if err := n.stopNode(); err != nil {
return err
}
select {
case <-time.After(startDelay):
case <-ctx.Done():
return ctx.Err()
}
return n.doStartNode()
}

Expand Down
4 changes: 2 additions & 2 deletions go/oasis-test-runner/scenario/e2e/debond.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (s *debondImpl) Fixture() (*oasis.NetworkFixture, error) {
},
DebondingDelegations: map[staking.Address]map[staking.Address][]*staking.DebondingDelegation{
EntityAccount: {
LockupAccount: {
DeterministicValidator0: {
{
Shares: *quantity.NewFromUint64(500),
DebondEndTime: 1,
Expand Down Expand Up @@ -90,7 +90,7 @@ func (s *debondImpl) Run(*env.Env) error {

// Beginning: lockup account has no funds.
lockupQuery := staking.OwnerQuery{
Owner: LockupAccount,
Owner: DeterministicValidator0,
Height: consensus.HeightLatest,
}
s.Logger.Info("checking balance at beginning")
Expand Down
18 changes: 9 additions & 9 deletions go/oasis-test-runner/scenario/e2e/runtime/gas_fees.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,47 +46,47 @@ func (sc *gasFeesRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) {
},
TotalSupply: *quantity.NewFromUint64(90000000),
Ledger: map[staking.Address]*staking.Account{
e2e.LockupAccount: {
e2e.DeterministicValidator0: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount0: {
e2e.DeterministicValidator1: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount1: {
e2e.DeterministicValidator2: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount2: {
e2e.DeterministicCompute0: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount3: {
e2e.DeterministicCompute1: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount4: {
e2e.DeterministicCompute2: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount5: {
e2e.DeterministicStorage0: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount6: {
e2e.DeterministicStorage1: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
},
e2e.MysteryAccount7: {
e2e.DeterministicKeyManager0: {
General: staking.GeneralAccount{
Balance: *quantity.NewFromUint64(10000000),
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func (sc *kmRestartImpl) Clone() scenario.Scenario {
}

func (sc *kmRestartImpl) Run(childEnv *env.Env) error {
ctx := context.Background()
clientErrCh, cmd, err := sc.runtimeImpl.start(childEnv)
if err != nil {
return err
Expand All @@ -55,7 +56,7 @@ func (sc *kmRestartImpl) Run(childEnv *env.Env) error {

// Restart the key manager.
sc.Logger.Info("restarting the key manager")
if err = km.Restart(); err != nil {
if err = km.Restart(ctx); err != nil {
return err
}

Expand All @@ -65,7 +66,7 @@ func (sc *kmRestartImpl) Run(childEnv *env.Env) error {
if err != nil {
return err
}
if err = kmCtrl.WaitReady(context.Background()); err != nil {
if err = kmCtrl.WaitReady(ctx); err != nil {
return err
}

Expand Down
7 changes: 4 additions & 3 deletions go/oasis-test-runner/scenario/e2e/runtime/node_shutdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func (sc *nodeShutdownImpl) Fixture() (*oasis.NetworkFixture, error) {
}

func (sc *nodeShutdownImpl) Run(childEnv *env.Env) error {
ctx := context.Background()
var err error

if err = sc.Net.Start(); err != nil {
Expand All @@ -56,12 +57,12 @@ func (sc *nodeShutdownImpl) Run(childEnv *env.Env) error {
if err != nil {
return err
}
if err = nodeCtrl.WaitReady(context.Background()); err != nil {
if err = nodeCtrl.WaitReady(ctx); err != nil {
return err
}

// Make sure that the GetStatus endpoint returns sensible values.
status, err := nodeCtrl.GetStatus(context.Background())
status, err := nodeCtrl.GetStatus(ctx)
if err != nil {
return fmt.Errorf("failed to get status for node: %w", err)
}
Expand Down Expand Up @@ -89,7 +90,7 @@ func (sc *nodeShutdownImpl) Run(childEnv *env.Env) error {
}

// Try restarting it; it should shutdown by itself soon after.
if err = computeWorker.Restart(); err != nil {
if err = computeWorker.Restart(ctx); err != nil {
return err
}
err = <-computeWorker.Exit()
Expand Down
Loading

0 comments on commit 2e00142

Please sign in to comment.