Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle MySQL handler error codes #17252

Merged
merged 4 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 190 additions & 0 deletions go/mysql/sqlerror/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ const (
ERBlobKeyWithoutLength = ErrorCode(1170)
ERPrimaryCantHaveNull = ErrorCode(1171)
ERTooManyRows = ErrorCode(1172)
ERErrorDuringCommit = ErrorCode(1180)
ERLockOrActiveTransaction = ErrorCode(1192)
ERUnknownSystemVariable = ErrorCode(1193)
ERSetConstantsOnly = ErrorCode(1204)
Expand Down Expand Up @@ -301,6 +302,195 @@ const (
ERServerIsntAvailable = ErrorCode(3168)
)

// HandlerErrorCode is for errors thrown by the handler, and which are then embedded in other errors.
// See https://github.com/mysql/mysql-server/blob/trunk/include/my_base.h
type HandlerErrorCode uint16

func (e HandlerErrorCode) ToString() string {
return strconv.FormatUint(uint64(e), 10)
}

const (
// Didn't find key on read or update
HaErrKeyNotFound = HandlerErrorCode(120)
// Duplicate key on write
HaErrFoundDuppKey = HandlerErrorCode(121)
// Internal error
HaErrInternalError = HandlerErrorCode(122)
// Uppdate with is recoverable
HaErrRecordChanged = HandlerErrorCode(123)
// Wrong index given to function
HaErrWrongIndex = HandlerErrorCode(124)
// Transaction has been rolled back
HaErrRolledBack = HandlerErrorCode(125)
// Indexfile is crashed
HaErrCrashed = HandlerErrorCode(126)
// Record-file is crashed
HaErrWrongInRecord = HandlerErrorCode(127)
// Record-file is crashed
HaErrOutOfMem = HandlerErrorCode(128)
// not a MYI file - no signature
HaErrNotATable = HandlerErrorCode(130)
// Command not supported
HaErrWrongCommand = HandlerErrorCode(131)
// old database file
HaErrOldFile = HandlerErrorCode(132)
// No record read in update()
HaErrNoActiveRecord = HandlerErrorCode(133)
// A record is not there
HaErrRecordDeleted = HandlerErrorCode(134)
// No more room in file
HaErrRecordFileFull = HandlerErrorCode(135)
// No more room in file
HaErrIndexFileFull = HandlerErrorCode(136)
// end in next/prev/first/last
HaErrEndOfFile = HandlerErrorCode(137)
// unsupported extension used
HaErrUnsupported = HandlerErrorCode(138)
// Too big row
HaErrTooBigRow = HandlerErrorCode(139)
// Wrong create option
HaWrongCreateOption = HandlerErrorCode(140)
// Duplicate unique on write
HaErrFoundDuppUnique = HandlerErrorCode(141)
// Can't open charset
HaErrUnknownCharset = HandlerErrorCode(142)
// conflicting tables in MERGE
HaErrWrongMrgTableDef = HandlerErrorCode(143)
// Last (automatic?) repair failed
HaErrCrashedOnRepair = HandlerErrorCode(144)
// Table must be repaired
HaErrCrashedOnUsage = HandlerErrorCode(145)
// Lock wait timeout
HaErrLockWaitTimeout = HandlerErrorCode(146)
// Lock table is full
HaErrLockTableFull = HandlerErrorCode(147)
// Updates not allowed
HaErrReadOnlyTransaction = HandlerErrorCode(148)
// Deadlock found when trying to get lock
HaErrLockDeadlock = HandlerErrorCode(149)
// Cannot add a foreign key constr.
HaErrCannotAddForeign = HandlerErrorCode(150)
// Cannot add a child row
HaErrNoReferencedRow = HandlerErrorCode(151)
// Cannot delete a parent row
HaErrRowIsReferenced = HandlerErrorCode(152)
// No savepoint with that name
HaErrNoSavepoint = HandlerErrorCode(153)
// Non unique key block size
HaErrNonUniqueBlockSize = HandlerErrorCode(154)
// The table does not exist in engine
HaErrNoSuchTable = HandlerErrorCode(155)
// The table existed in storage engine
HaErrTableExist = HandlerErrorCode(156)
// Could not connect to storage engine
HaErrNoConnection = HandlerErrorCode(157)
// NULLs are not supported in spatial index
HaErrNullInSpatial = HandlerErrorCode(158)
// The table changed in storage engine
HaErrTableDefChanged = HandlerErrorCode(159)
// There's no partition in table for given value
HaErrNoPartitionFound = HandlerErrorCode(160)
// Row-based binlogging of row failed
HaErrRbrLoggingFailed = HandlerErrorCode(161)
// Index needed in foreign key constraint
HaErrDropIndexFk = HandlerErrorCode(162)
// Upholding foreign key constraints would lead to a duplicate key error in some other table.
HaErrForeignDuplicateKey = HandlerErrorCode(163)
// The table changed in storage engine
HaErrTableNeedsUpgrade = HandlerErrorCode(164)
// The table is not writable
HaErrTableReadonly = HandlerErrorCode(165)
// Failed to get next autoinc value
HaErrAutoincReadFailed = HandlerErrorCode(166)
// Failed to set row autoinc value
HaErrAutoincErange = HandlerErrorCode(167)
// Generic error
HaErrGeneric = HandlerErrorCode(168)
// row not actually updated: new values same as the old values
HaErrRecordIsTheSame = HandlerErrorCode(169)
// It is not possible to log this statement
HaErrLoggingImpossible = HandlerErrorCode(170)
// The event was corrupt, leading to illegal data being read
HaErrCorruptEvent = HandlerErrorCode(171)
// New file format
HaErrNewFile = HandlerErrorCode(172)
// The event could not be processed no other handler error happened
HaErrRowsEventApply = HandlerErrorCode(173)
// Error during initialization
HaErrInitialization = HandlerErrorCode(174)
// File too short
HaErrFileTooShort = HandlerErrorCode(175)
// Wrong CRC on page
HaErrWrongCrc = HandlerErrorCode(176)
// Too many active concurrent transactions
HaErrTooManyConcurrentTrxs = HandlerErrorCode(177)
// There's no explicitly listed partition in table for the given value
HaErrNotInLockPartitions = HandlerErrorCode(178)
// Index column length exceeds limit
HaErrIndexColTooLong = HandlerErrorCode(179)
// InnoDB index corrupted
HaErrIndexCorrupt = HandlerErrorCode(180)
// Undo log record too big
HaErrUndoRecTooBig = HandlerErrorCode(181)
// Invalid InnoDB Doc ID
HaFtsInvalidDocid = HandlerErrorCode(182)
// Table being used in foreign key check
HaErrTableInFkCheck = HandlerErrorCode(183)
// The tablespace existed in storage engine
HaErrTablespaceExists = HandlerErrorCode(184)
// Table has too many columns
HaErrTooManyFields = HandlerErrorCode(185)
// Row in wrong partition
HaErrRowInWrongPartition = HandlerErrorCode(186)
// InnoDB is in read only mode.
HaErrInnodbReadOnly = HandlerErrorCode(187)
// FTS query exceeds result cache limit
HaErrFtsExceedResultCacheLimit = HandlerErrorCode(188)
// Temporary file write failure
HaErrTempFileWriteFailure = HandlerErrorCode(189)
// Innodb is in force recovery mode
HaErrInnodbForcedRecovery = HandlerErrorCode(190)
// Too many words in a phrase
HaErrFtsTooManyWordsInPhrase = HandlerErrorCode(191)
// FK cascade depth exceeded
HaErrFkDepthExceeded = HandlerErrorCode(192)
// Option Missing during Create
HaMissingCreateOption = HandlerErrorCode(193)
// Out of memory in storage engine
HaErrSeOutOfMemory = HandlerErrorCode(194)
// Table/Clustered index is corrupted.
HaErrTableCorrupt = HandlerErrorCode(195)
// The query was interrupted
HaErrQueryInterrupted = HandlerErrorCode(196)
// Missing Tablespace
HaErrTablespaceMissing = HandlerErrorCode(197)
// Tablespace is not empty
HaErrTablespaceIsNotEmpty = HandlerErrorCode(198)
// Invalid Filename
HaErrWrongFileName = HandlerErrorCode(199)
// Operation is not allowed
HaErrNotAllowedCommand = HandlerErrorCode(200)
// Compute generated column value failed
HaErrComputeFailed = HandlerErrorCode(201)
// Table's row format has changed in the storage engine. Information in the data-dictionary needs to be updated.
HaErrRowFormatChanged = HandlerErrorCode(202)
// Don't wait for record lock
HaErrNoWaitLock = HandlerErrorCode(203)
// No more room in disk
HaErrDiskFullNowait = HandlerErrorCode(204)
// No session temporary space available
HaErrNoSessionTemp = HandlerErrorCode(205)
// Wrong or Invalid table name
HaErrWrongTableName = HandlerErrorCode(206)
// Path is too long for the OS
HaErrTooLongPath = HandlerErrorCode(207)
// Histogram sampling initialization failed
HaErrSamplingInitFailed = HandlerErrorCode(208)
// Too many sub-expression in search string
HaErrFtsTooManyNestedExp = HandlerErrorCode(209)
)

// Sql states for errors.
// Originally found in include/mysql/sql_state.h
const (
Expand Down
12 changes: 12 additions & 0 deletions go/mysql/sqlerror/sql_error.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,18 @@ func NewSQLError(number ErrorCode, sqlState string, msg string) *SQLError {
}
}

var handlerErrExtract = regexp.MustCompile(`Got error ([0-9]*) [-] .* (from storage engine|during COMMIT|during ROLLBACK)`)

func (se *SQLError) HaErrorCode() HandlerErrorCode {
match := handlerErrExtract.FindStringSubmatch(se.Message)
if len(match) >= 1 {
if code, err := strconv.ParseUint(match[1], 10, 16); err == nil {
return HandlerErrorCode(code)
}
}
return 0
}

// Error implements the error interface
func (se *SQLError) Error() string {
var buf strings.Builder
Expand Down
21 changes: 21 additions & 0 deletions go/mysql/sqlerror/sql_error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func TestNewSQLErrorFromError(t *testing.T) {
var tCases = []struct {
err error
num ErrorCode
ha HandlerErrorCode
ss string
}{
{
Expand Down Expand Up @@ -179,6 +180,24 @@ func TestNewSQLErrorFromError(t *testing.T) {
num: ERDupEntry,
ss: SSConstraintViolation,
},
{
err: fmt.Errorf("ERROR HY000: Got error 204 - 'No more room in disk' during COMMIT"),
num: ERUnknownError,
ss: SSUnknownSQLState,
ha: HaErrDiskFullNowait,
},
{
err: fmt.Errorf("COMMIT failed w/ error: Got error 204 - 'No more room in disk' during COMMIT (errno 1180) (sqlstate HY000) during query: commit"),
num: ERErrorDuringCommit,
ss: SSUnknownSQLState,
ha: HaErrDiskFullNowait,
},
{
err: fmt.Errorf("COMMIT failed w/ error: Got error 149 - 'Lock deadlock; Retry transaction' during COMMIT (errno 1180) (sqlstate HY000) during query: commit"),
num: ERErrorDuringCommit,
ss: SSUnknownSQLState,
ha: HaErrLockDeadlock,
},
}

for _, tc := range tCases {
Expand All @@ -187,6 +206,8 @@ func TestNewSQLErrorFromError(t *testing.T) {
require.ErrorAs(t, NewSQLErrorFromError(tc.err), &err)
assert.Equal(t, tc.num, err.Number())
assert.Equal(t, tc.ss, err.SQLState())
ha := err.HaErrorCode()
assert.Equal(t, tc.ha, ha)
})
}
}
18 changes: 18 additions & 0 deletions go/vt/vttablet/tabletmanager/vreplication/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,24 @@ func isUnrecoverableError(err error) bool {
sqlerror.ERWrongValueCountOnRow:
log.Errorf("Got unrecoverable error: %v", sqlErr)
return true
case sqlerror.ERErrorDuringCommit:
switch sqlErr.HaErrorCode() {
case
0, // Not really a HA error.
sqlerror.HaErrLockDeadlock,
sqlerror.HaErrLockTableFull,
sqlerror.HaErrLockWaitTimeout,
sqlerror.HaErrNotInLockPartitions,
sqlerror.HaErrQueryInterrupted,
sqlerror.HaErrRolledBack,
sqlerror.HaErrTooManyConcurrentTrxs,
sqlerror.HaErrUndoRecTooBig:
// These are recoverable errors.
return false
default:
log.Errorf("Got unrecoverable error: %v", sqlErr)
return true
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shlomi-noach Isn't it easier to invert this and only here have explicit error codes that are recoverable? That's I think also more robust for new error codes etc. in the future to assume it's not recoverable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm ambivalent. On one hand, until today I have never encountered this error in Vitess. On the other hand, I'm afraid to introduce new (so called "unaccounted for") failure points.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd lean towards treating unknown things then as not recoverable / retryable? Seems less risky to keep hammering on something that is an unexpected / unknown failure?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We now opt-in for recoverable errors.

}
return false
}
10 changes: 10 additions & 0 deletions go/vt/vttablet/tabletmanager/vreplication/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,16 @@ func TestIsUnrecoverableError(t *testing.T) {
err: sqlerror.NewSQLError(sqlerror.ERDataOutOfRange, "data out of range", "test"),
expected: true,
},
{
name: "SQL error with HaErrDiskFullNowait error",
err: sqlerror.NewSQLError(sqlerror.ERErrorDuringCommit, "unknown", "ERROR HY000: Got error 204 - 'No more room in disk' during COMMIT"),
expected: true,
},
{
name: "SQL error with HaErrLockDeadlock error",
err: sqlerror.NewSQLError(sqlerror.ERErrorDuringCommit, "unknown", "ERROR HY000: Got error 149 - 'Lock deadlock; Retry transaction' during COMMIT"),
expected: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
Expand Down
Loading