From 8864285ba0418749ee93d92bc02b39ad1fe67fa1 Mon Sep 17 00:00:00 2001 From: Urvi Date: Wed, 31 May 2023 20:52:34 -0700 Subject: [PATCH] services/horizon: Improve error handling for when stellar-core crashes --- ingest/ledgerbackend/captive_core_backend.go | 21 ++++----- .../captive_core_backend_test.go | 45 ++++++++++++++----- 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/ingest/ledgerbackend/captive_core_backend.go b/ingest/ledgerbackend/captive_core_backend.go index deb618cf20..238d91a069 100644 --- a/ingest/ledgerbackend/captive_core_backend.go +++ b/ingest/ledgerbackend/captive_core_backend.go @@ -563,25 +563,26 @@ func (c *CaptiveStellarCore) handleMetaPipeResult(sequence uint32, result metaRe } func (c *CaptiveStellarCore) checkMetaPipeResult(result metaResult, ok bool) error { - // There are 3 types of errors we check for: + // There are 4 error scenarios we check for: // 1. User initiated shutdown by canceling the parent context or calling Close(). - // 2. The stellar core process exited unexpectedly. + // 2. The stellar core process exited unexpectedly with an error message. // 3. Some error was encountered while consuming the ledgers emitted by captive core (e.g. parsing invalid xdr) + // 4. The stellar core process exited unexpectedly without an error message if err := c.stellarCoreRunner.context().Err(); err != nil { // Case 1 - User initiated shutdown by canceling the parent context or calling Close() return err } if !ok || result.err != nil { - if result.err != nil { + exited, err := c.stellarCoreRunner.getProcessExitError() + if exited && err != nil { + // Case 2 - The stellar core process exited unexpectedly with an error message + return errors.Wrap(err, "stellar core exited unexpectedly") + } else if result.err != nil { // Case 3 - Some error was encountered while consuming the ledger stream emitted by captive core. return result.err - } else if exited, err := c.stellarCoreRunner.getProcessExitError(); exited { - // Case 2 - The stellar core process exited unexpectedly - if err == nil { - return errors.Errorf("stellar core exited unexpectedly") - } else { - return errors.Wrap(err, "stellar core exited unexpectedly") - } + } else if exited { + // case 4 - The stellar core process exited unexpectedly without an error message + return errors.Errorf("stellar core exited unexpectedly") } else if !ok { // This case should never happen because the ledger buffer channel can only be closed // if and only if the process exits or the context is canceled. diff --git a/ingest/ledgerbackend/captive_core_backend_test.go b/ingest/ledgerbackend/captive_core_backend_test.go index 7de69db927..25bad26d5f 100644 --- a/ingest/ledgerbackend/captive_core_backend_test.go +++ b/ingest/ledgerbackend/captive_core_backend_test.go @@ -899,6 +899,7 @@ func TestCaptiveGetLedger_ErrReadingMetaResult(t *testing.T) { mockRunner.On("close").Return(nil).Run(func(args mock.Arguments) { cancel() }).Once() + mockRunner.On("getProcessExitError").Return(false, nil) // even if the request to fetch the latest checkpoint succeeds, we should fail at creating the subprocess mockArchive := &historyarchive.MockArchive{} @@ -1084,17 +1085,19 @@ func TestGetLedgerBoundsCheck(t *testing.T) { mockRunner.AssertExpectations(t) } -func TestCaptiveGetLedgerTerminatedUnexpectedly(t *testing.T) { +type GetLedgerTerminatedTestCase struct { + name string + ctx context.Context + ledgers []metaResult + processExited bool + processExitedError error + expectedError string +} + +func CaptiveGetLedgerTerminatedUnexpectedlyTestCases() []GetLedgerTerminatedTestCase { ledger64 := buildLedgerCloseMeta(testLedgerHeader{sequence: uint32(64)}) - for _, testCase := range []struct { - name string - ctx context.Context - ledgers []metaResult - processExited bool - processExitedError error - expectedError string - }{ + return []GetLedgerTerminatedTestCase{ { "stellar core exited unexpectedly without error", context.Background(), @@ -1135,7 +1138,29 @@ func TestCaptiveGetLedgerTerminatedUnexpectedly(t *testing.T) { nil, "meta pipe closed unexpectedly", }, - } { + { + "Parser error while reading from the pipe resulting in stellar-core exit", + context.Background(), + []metaResult{{LedgerCloseMeta: &ledger64}, + {LedgerCloseMeta: nil, err: errors.New("Parser error")}}, + true, + nil, + "Parser error", + }, + { + "stellar core exited unexpectedly with an error resulting in meta pipe closed", + context.Background(), + []metaResult{{LedgerCloseMeta: &ledger64}, + {LedgerCloseMeta: &ledger64, err: errors.New("EOF while decoding")}}, + true, + fmt.Errorf("signal kill"), + "stellar core exited unexpectedly: signal kill", + }, + } +} + +func TestCaptiveGetLedgerTerminatedUnexpectedly(t *testing.T) { + for _, testCase := range CaptiveGetLedgerTerminatedUnexpectedlyTestCases() { t.Run(testCase.name, func(t *testing.T) { metaChan := make(chan metaResult, 100)