From a89275a19a1c98a451c8fd4f01dbb13c7df682a5 Mon Sep 17 00:00:00 2001 From: Bartek Nowotarski Date: Wed, 28 Oct 2020 18:19:40 +0100 Subject: [PATCH] services/horizon: Check state rebuild in state verification integration test (#3127) All integration tests start from empty state so we don't test building state from history archives. Extended state verification integration test to trigger state rebuild (`horizon expingest trigger-state-rebuild`) and verify rebuild state again. Such test would catch the bugs like #3100 and #3096. --- .circleci/config.yml | 2 +- .../protocol14_state_verifier_test.go | 59 +++++++++++++--- .../internal/test/integration/integration.go | 70 ++++++++++++++++++- 3 files changed, 116 insertions(+), 15 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9654b51405..4b42b6246f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -230,7 +230,7 @@ commands: command: | cd ~/go/src/github.com/stellar/go docker pull stellar/quickstart:testing - <<# parameters.enable-captive-core >>HORIZON_INTEGRATION_ENABLE_CAPTIVE_CORE=true<> go test -timeout 20m -v ./services/horizon/internal/integration/... + <<# parameters.enable-captive-core >>HORIZON_INTEGRATION_ENABLE_CAPTIVE_CORE=true<> go test -timeout 25m -v ./services/horizon/internal/integration/... #-----------------------------------------------------------------------------# # Jobs use the commands to accomplish a given task, and run through workflows # diff --git a/services/horizon/internal/integration/protocol14_state_verifier_test.go b/services/horizon/internal/integration/protocol14_state_verifier_test.go index daad7716db..b0c5cfa804 100644 --- a/services/horizon/internal/integration/protocol14_state_verifier_test.go +++ b/services/horizon/internal/integration/protocol14_state_verifier_test.go @@ -14,6 +14,12 @@ import ( "github.com/stretchr/testify/assert" ) +const ( + firstCheckpoint = (64 * (iota + 1)) - 1 + secondCheckpoint + thirdCheckpoint +) + func TestProtocol14StateVerifier(t *testing.T) { itest := integration.NewTest(t, protocol15Config) @@ -99,36 +105,67 @@ func TestProtocol14StateVerifier(t *testing.T) { assert.True(t, txResp.Successful) // Reach the first checkpoint ledger - for !itest.LedgerIngested(63) { - err := itest.CloseCoreLedger() - assert.NoError(t, err) - time.Sleep(50 * time.Millisecond) + // Core will push to history archives *after* checkpoint ledger + itest.CloseCoreLedgersUntilSequence(firstCheckpoint + 1) + assert.NoError(t, err) + for !itest.LedgerIngested(firstCheckpoint) { + time.Sleep(time.Second) + } + + verified := waitForStateVerifications(itest, 1) + if !verified { + t.Fatal("State verification not run...") + } + + // Trigger state rebuild to check if ingesting from history archive works + itest.RunHorizonCLICommand("expingest", "trigger-state-rebuild") + + // Wait for the second checkpoint ledger and state rebuild + // Core will push to history archives *after* checkpoint ledger + itest.CloseCoreLedgersUntilSequence(secondCheckpoint + 1) + assert.NoError(t, err) + + // Wait for the third checkpoint ledger and state verification trigger + // Core will push to history archives *after* checkpoint ledger + itest.CloseCoreLedgersUntilSequence(thirdCheckpoint + 1) + assert.NoError(t, err) + for !itest.LedgerIngested(thirdCheckpoint) { + time.Sleep(time.Second) } - var metrics string + verified = waitForStateVerifications(itest, 2) + if !verified { + t.Fatal("State verification not run...") + } +} +func waitForStateVerifications(itest *integration.Test, count int) bool { + t := itest.CurrentTest() // Check metrics until state verification run - for i := 0; i < 60; i++ { + for i := 0; i < 120; i++ { t.Logf("Checking metrics (%d attempt)\n", i) - res, err := http.Get(fmt.Sprintf("http://localhost:%d/metrics", itest.AdminPort())) + res, err := http.Get(itest.MetricsURL()) assert.NoError(t, err) metricsBytes, err := ioutil.ReadAll(res.Body) res.Body.Close() assert.NoError(t, err) - metrics = string(metricsBytes) + metrics := string(metricsBytes) stateInvalid := strings.Contains(metrics, "horizon_ingest_state_invalid 1") assert.False(t, stateInvalid, "State is invalid!") - notVerifiedYet := strings.Contains(metrics, "horizon_ingest_state_verify_duration_seconds_count 0") + notVerifiedYet := strings.Contains( + metrics, + fmt.Sprintf("horizon_ingest_state_verify_duration_seconds_count %d", count-1), + ) if notVerifiedYet { time.Sleep(time.Second) continue } - return + return true } - t.Fatal("State verification not run...") + return false } diff --git a/services/horizon/internal/test/integration/integration.go b/services/horizon/internal/test/integration/integration.go index 4acd3f0229..40b4a0231f 100644 --- a/services/horizon/internal/test/integration/integration.go +++ b/services/horizon/internal/test/integration/integration.go @@ -1,3 +1,4 @@ +//lint:file-ignore U1001 Ignore all unused code, this is only used in tests. package integration import ( @@ -333,18 +334,33 @@ func (i *Test) Client() *sdk.Client { // ingested by Horizon. Panics in case of errors. func (i *Test) LedgerIngested(sequence uint32) bool { root, err := i.Client().Root() - if err != nil { - panic(err) - } + panicIf(err) return root.IngestSequence >= sequence } +// LedgerClosed returns true if the ledger with a given sequence has been +// closed by Stellar-Core. Panics in case of errors. Note it's different +// than LedgerIngested because it checks if the ledger was closed, not +// necessarily ingested (ex. when rebuilding state Horizon does not ingest +// recent ledgers). +func (i *Test) LedgerClosed(sequence uint32) bool { + root, err := i.Client().Root() + panicIf(err) + + return root.CoreSequence >= int32(sequence) +} + // AdminPort returns Horizon admin port. func (i *Test) AdminPort() int { return 6060 } +// Metrics URL returns Horizon metrics URL. +func (i *Test) MetricsURL() string { + return fmt.Sprintf("http://localhost:%d/metrics", i.AdminPort()) +} + // Master returns a keypair of the network master account. func (i *Test) Master() *keypair.Full { return keypair.Master(NetworkPassphrase).(*keypair.Full) @@ -638,9 +654,43 @@ func (i *Test) CreateSignedTransaction( return tx, nil } +// CloseCoreLedgersUntilSequence will close ledgers until sequence. +// Note: because manualclose command doesn't block until ledger is actually +// closed, after running this method the last sequence can be higher than seq. +func (i *Test) CloseCoreLedgersUntilSequence(seq int) error { + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + info, err := i.cclient.Info(ctx) + if err != nil { + return err + } + + if info.Info.Ledger.Num >= seq { + return nil + } + + i.t.Logf( + "Currently at ledger: %d, want: %d.", + info.Info.Ledger.Num, + seq, + ) + + err = i.CloseCoreLedger() + if err != nil { + return err + } + // manualclose command in core doesn't block until ledger is actually + // closed. Let's give it time to close the ledger. + time.Sleep(200 * time.Millisecond) + } +} + +// CloseCoreLedgers will close one ledger. func (i *Test) CloseCoreLedger() error { ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() + i.t.Log("Closing one ledger manually...") return i.cclient.ManualClose(ctx) } @@ -686,6 +736,20 @@ func (i *Test) LogFailedTx(txResponse proto.Transaction, horizonResult error) { "Transaction doesn't have success code.") } +func (i *Test) RunHorizonCLICommand(cmd ...string) { + fullCmd := append([]string{"/stellar/horizon/bin/horizon"}, cmd...) + id, err := i.cli.ContainerExecCreate( + context.Background(), + i.container.ID, + types.ExecConfig{ + Cmd: fullCmd, + }, + ) + panicIf(err) + err = i.cli.ContainerExecStart(context.Background(), id.ID, types.ExecStartCheck{}) + panicIf(err) +} + // Cluttering code with if err != nil is absolute nonsense. func panicIf(err error) { if err != nil {