Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

services/horizon: Improve error handling for when stellar-core crashes #4893

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions ingest/ledgerbackend/captive_core_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,25 +563,26 @@ func (c *CaptiveStellarCore) handleMetaPipeResult(sequence uint32, result metaRe
}

func (c *CaptiveStellarCore) checkMetaPipeResult(result metaResult, ok bool) error {
// There are 3 types of errors we check for:
// There are 4 error scenarios we check for:
// 1. User initiated shutdown by canceling the parent context or calling Close().
// 2. The stellar core process exited unexpectedly.
// 2. The stellar core process exited unexpectedly with an error message.
// 3. Some error was encountered while consuming the ledgers emitted by captive core (e.g. parsing invalid xdr)
// 4. The stellar core process exited unexpectedly without an error message
if err := c.stellarCoreRunner.context().Err(); err != nil {
// Case 1 - User initiated shutdown by canceling the parent context or calling Close()
return err
}
if !ok || result.err != nil {
if result.err != nil {
exited, err := c.stellarCoreRunner.getProcessExitError()
if exited && err != nil {
// Case 2 - The stellar core process exited unexpectedly with an error message
return errors.Wrap(err, "stellar core exited unexpectedly")
} else if result.err != nil {
// Case 3 - Some error was encountered while consuming the ledger stream emitted by captive core.
return result.err
} else if exited, err := c.stellarCoreRunner.getProcessExitError(); exited {
// Case 2 - The stellar core process exited unexpectedly
if err == nil {
return errors.Errorf("stellar core exited unexpectedly")
} else {
return errors.Wrap(err, "stellar core exited unexpectedly")
}
} else if exited {
// case 4 - The stellar core process exited unexpectedly without an error message
return errors.Errorf("stellar core exited unexpectedly")
} else if !ok {
// This case should never happen because the ledger buffer channel can only be closed
// if and only if the process exits or the context is canceled.
Expand Down
45 changes: 35 additions & 10 deletions ingest/ledgerbackend/captive_core_backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,7 @@ func TestCaptiveGetLedger_ErrReadingMetaResult(t *testing.T) {
mockRunner.On("close").Return(nil).Run(func(args mock.Arguments) {
cancel()
}).Once()
mockRunner.On("getProcessExitError").Return(false, nil)

// even if the request to fetch the latest checkpoint succeeds, we should fail at creating the subprocess
mockArchive := &historyarchive.MockArchive{}
Expand Down Expand Up @@ -1084,17 +1085,19 @@ func TestGetLedgerBoundsCheck(t *testing.T) {
mockRunner.AssertExpectations(t)
}

func TestCaptiveGetLedgerTerminatedUnexpectedly(t *testing.T) {
type GetLedgerTerminatedTestCase struct {
name string
ctx context.Context
ledgers []metaResult
processExited bool
processExitedError error
expectedError string
}

func CaptiveGetLedgerTerminatedUnexpectedlyTestCases() []GetLedgerTerminatedTestCase {
ledger64 := buildLedgerCloseMeta(testLedgerHeader{sequence: uint32(64)})

for _, testCase := range []struct {
name string
ctx context.Context
ledgers []metaResult
processExited bool
processExitedError error
expectedError string
}{
return []GetLedgerTerminatedTestCase{
{
"stellar core exited unexpectedly without error",
context.Background(),
Expand Down Expand Up @@ -1135,7 +1138,29 @@ func TestCaptiveGetLedgerTerminatedUnexpectedly(t *testing.T) {
nil,
"meta pipe closed unexpectedly",
},
} {
{
"Parser error while reading from the pipe resulting in stellar-core exit",
context.Background(),
[]metaResult{{LedgerCloseMeta: &ledger64},
{LedgerCloseMeta: nil, err: errors.New("Parser error")}},
true,
nil,
"Parser error",
},
{
"stellar core exited unexpectedly with an error resulting in meta pipe closed",
context.Background(),
[]metaResult{{LedgerCloseMeta: &ledger64},
{LedgerCloseMeta: &ledger64, err: errors.New("EOF while decoding")}},
true,
fmt.Errorf("signal kill"),
"stellar core exited unexpectedly: signal kill",
},
}
}

func TestCaptiveGetLedgerTerminatedUnexpectedly(t *testing.T) {
for _, testCase := range CaptiveGetLedgerTerminatedUnexpectedlyTestCases() {
t.Run(testCase.name, func(t *testing.T) {
metaChan := make(chan metaResult, 100)

Expand Down