From 21a251b620406a1471764b884edcfa9568e67c50 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Thu, 1 Aug 2019 15:11:34 +0300 Subject: [PATCH 1/4] Panic on missing _ghc and _gho tables --- go/base/context.go | 17 +++++++++++++++++ go/logic/applier.go | 2 ++ go/logic/migrator.go | 17 ++++++++++------- go/logic/server.go | 2 +- go/logic/throttler.go | 7 ++++--- go/mysql/utils.go | 5 +++++ 6 files changed, 39 insertions(+), 11 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index f12cd43c4..d52327484 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -736,3 +736,20 @@ func (this *MigrationContext) ReadConfigFile() error { return nil } + +func (this *MigrationContext) PanicAbortIfTableError(err error) { + if err == nil { + return + } + if strings.Contains(err.Error(), mysql.Error1146TableDoesntExist) || strings.Contains(err.Error(), mysql.Error1017CantFindFile) { + this.PanicAbortOnError(err) + } + // otherwise irrelevant error and we do not panic +} + +func (this *MigrationContext) PanicAbortOnError(err error) { + if err == nil { + return + } + this.PanicAbort <- err +} diff --git a/go/logic/applier.go b/go/logic/applier.go index 5fb795bac..bc4824248 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -916,6 +916,8 @@ func (this *Applier) AtomicCutoverRename(sessionIdChan chan int64, tablesRenamed ) log.Infof("Issuing and expecting this to block: %s", query) if _, err := tx.Exec(query); err != nil { + this.migrationContext.PanicAbortIfTableError(err) + tablesRenamed <- err return log.Errore(err) } diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 86de5ac15..533ecf147 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -144,7 +144,7 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo // there's an error. Let's try again. } if len(notFatalHint) == 0 { - this.migrationContext.PanicAbort <- err + this.migrationContext.PanicAbortOnError(err) } return err } @@ -172,7 +172,7 @@ func (this *Migrator) retryOperationWithExponentialBackoff(operation func() erro } } if len(notFatalHint) == 0 { - this.migrationContext.PanicAbort <- err + this.migrationContext.PanicAbortOnError(err) } return err } @@ -191,14 +191,14 @@ func (this *Migrator) executeAndThrottleOnError(operation func() error) (err err // consumes and drops any further incoming events that may be left hanging. func (this *Migrator) consumeRowCopyComplete() { if err := <-this.rowCopyComplete; err != nil { - this.migrationContext.PanicAbort <- err + this.migrationContext.PanicAbortOnError(err) } atomic.StoreInt64(&this.rowCopyCompleteFlag, 1) this.migrationContext.MarkRowCopyEndTime() go func() { for err := range this.rowCopyComplete { if err != nil { - this.migrationContext.PanicAbort <- err + this.migrationContext.PanicAbortOnError(err) } } }() @@ -620,10 +620,12 @@ func (this *Migrator) atomicCutOver() (err error) { tableUnlocked := make(chan error, 2) go func() { if err := this.applier.AtomicCutOverMagicLock(lockOriginalSessionIdChan, tableLocked, okToUnlockTable, tableUnlocked); err != nil { + this.migrationContext.PanicAbortIfTableError(err) log.Errore(err) } }() if err := <-tableLocked; err != nil { + this.migrationContext.PanicAbortIfTableError(err) return log.Errore(err) } lockOriginalSessionId := <-lockOriginalSessionIdChan @@ -631,6 +633,7 @@ func (this *Migrator) atomicCutOver() (err error) { // At this point we know the original table is locked. // We know any newly incoming DML on original table is blocked. if err := this.waitForEventsUpToLock(); err != nil { + this.migrationContext.PanicAbortIfTableError(err) return log.Errore(err) } @@ -644,6 +647,7 @@ func (this *Migrator) atomicCutOver() (err error) { go func() { if err := this.applier.AtomicCutoverRename(renameSessionIdChan, tablesRenamed); err != nil { // Abort! Release the lock + this.migrationContext.PanicAbortIfTableError(err) atomic.StoreInt64(&tableRenameKnownToHaveFailed, 1) okToUnlockTable <- true } @@ -996,9 +1000,8 @@ func (this *Migrator) initiateStreaming() error { go func() { log.Debugf("Beginning streaming") - err := this.eventsStreamer.StreamEvents(this.canStopStreaming) - if err != nil { - this.migrationContext.PanicAbort <- err + if err := this.eventsStreamer.StreamEvents(this.canStopStreaming); err != nil { + this.migrationContext.PanicAbortOnError(err) } log.Debugf("Done streaming") }() diff --git a/go/logic/server.go b/go/logic/server.go index 774c4ab21..7c50d80b6 100644 --- a/go/logic/server.go +++ b/go/logic/server.go @@ -342,7 +342,7 @@ help # This message return NoPrintStatusRule, err } err := fmt.Errorf("User commanded 'panic'. I will now panic, without cleanup. PANIC!") - this.migrationContext.PanicAbort <- err + this.migrationContext.PanicAbortOnError(err) return NoPrintStatusRule, err } default: diff --git a/go/logic/throttler.go b/go/logic/throttler.go index 6f9f4bba7..7e0df4f19 100644 --- a/go/logic/throttler.go +++ b/go/logic/throttler.go @@ -209,6 +209,7 @@ func (this *Throttler) collectControlReplicasLag() { lagResult := &mysql.ReplicationLagResult{Key: connectionConfig.Key} go func() { lagResult.Lag, lagResult.Err = readReplicaLag(connectionConfig) + this.migrationContext.PanicAbortIfTableError(lagResult.Err) lagResults <- lagResult }() } @@ -317,7 +318,7 @@ func (this *Throttler) collectGeneralThrottleMetrics() error { // Regardless of throttle, we take opportunity to check for panic-abort if this.migrationContext.PanicFlagFile != "" { if base.FileExists(this.migrationContext.PanicFlagFile) { - this.migrationContext.PanicAbort <- fmt.Errorf("Found panic-file %s. Aborting without cleanup", this.migrationContext.PanicFlagFile) + this.migrationContext.PanicAbortOnError(fmt.Errorf("Found panic-file %s. Aborting without cleanup", this.migrationContext.PanicFlagFile)) } } @@ -340,7 +341,7 @@ func (this *Throttler) collectGeneralThrottleMetrics() error { } if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 { - this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold) + this.migrationContext.PanicAbortOnError(fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)) } if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds > 0 { log.Errorf("critical-load met once: %s=%d, >=%d. Will check again in %d millis", variableName, value, threshold, this.migrationContext.CriticalLoadIntervalMilliseconds) @@ -348,7 +349,7 @@ func (this *Throttler) collectGeneralThrottleMetrics() error { timer := time.NewTimer(time.Millisecond * time.Duration(this.migrationContext.CriticalLoadIntervalMilliseconds)) <-timer.C if criticalLoadMetAgain, variableName, value, threshold, _ := this.criticalLoadIsMet(); criticalLoadMetAgain { - this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met again after %d millis: %s=%d, >=%d", this.migrationContext.CriticalLoadIntervalMilliseconds, variableName, value, threshold) + this.migrationContext.PanicAbortOnError(fmt.Errorf("critical-load met again after %d millis: %s=%d, >=%d", this.migrationContext.CriticalLoadIntervalMilliseconds, variableName, value, threshold)) } }() } diff --git a/go/mysql/utils.go b/go/mysql/utils.go index 17bb5fc32..a14eb948a 100644 --- a/go/mysql/utils.go +++ b/go/mysql/utils.go @@ -18,6 +18,11 @@ import ( "github.com/outbrain/golib/sqlutils" ) +const ( + Error1017CantFindFile = "Error 1017:" + Error1146TableDoesntExist = "Error 1146:" +) + const MaxTableNameLength = 64 const MaxReplicationPasswordLength = 32 From e2482d4e2ee713ec1176ec8ab4f826bec6dfb766 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Thu, 1 Aug 2019 15:13:08 +0300 Subject: [PATCH 2/4] adding tests --- localtests/fail-drop-ghc-table/create.sql | 23 +++++++++++++++++++ localtests/fail-drop-ghc-table/expect_failure | 1 + .../fail-drop-gho-table-no-data/create.sql | 22 ++++++++++++++++++ .../expect_failure | 1 + .../fail-drop-gho-table-no-data/extra_args | 1 + localtests/fail-drop-gho-table/create.sql | 22 ++++++++++++++++++ localtests/fail-drop-gho-table/expect_failure | 1 + 7 files changed, 71 insertions(+) create mode 100644 localtests/fail-drop-ghc-table/create.sql create mode 100644 localtests/fail-drop-ghc-table/expect_failure create mode 100644 localtests/fail-drop-gho-table-no-data/create.sql create mode 100644 localtests/fail-drop-gho-table-no-data/expect_failure create mode 100644 localtests/fail-drop-gho-table-no-data/extra_args create mode 100644 localtests/fail-drop-gho-table/create.sql create mode 100644 localtests/fail-drop-gho-table/expect_failure diff --git a/localtests/fail-drop-ghc-table/create.sql b/localtests/fail-drop-ghc-table/create.sql new file mode 100644 index 000000000..1973ec1aa --- /dev/null +++ b/localtests/fail-drop-ghc-table/create.sql @@ -0,0 +1,23 @@ +drop table if exists gh_ost_test; +create table gh_ost_test ( + id int auto_increment, + i int not null, + color varchar(32), + primary key(id) +) auto_increment=1; + +insert into gh_ost_test values (null, 1, 'red'); + +drop event if exists gh_ost_test; +delimiter ;; +create event gh_ost_test + on schedule every 1 second + starts current_timestamp + interval 3 second + ends current_timestamp + interval 60 second + on completion not preserve + enable + do +begin + insert into gh_ost_test values (null, 1, 'blue'); + drop table if exists _gh_ost_test_ghc; +end ;; diff --git a/localtests/fail-drop-ghc-table/expect_failure b/localtests/fail-drop-ghc-table/expect_failure new file mode 100644 index 000000000..fd2f6bf65 --- /dev/null +++ b/localtests/fail-drop-ghc-table/expect_failure @@ -0,0 +1 @@ +Error 1146: Table 'test._gh_ost_test_ghc' doesn't exist diff --git a/localtests/fail-drop-gho-table-no-data/create.sql b/localtests/fail-drop-gho-table-no-data/create.sql new file mode 100644 index 000000000..f3413c3a8 --- /dev/null +++ b/localtests/fail-drop-gho-table-no-data/create.sql @@ -0,0 +1,22 @@ +drop table if exists gh_ost_test; +create table gh_ost_test ( + id int auto_increment, + i int not null, + color varchar(32), + primary key(id) +) auto_increment=1; + +insert into gh_ost_test values (null, 1, 'blue'); + +drop event if exists gh_ost_test; +delimiter ;; +create event gh_ost_test + on schedule every 1 second + starts current_timestamp + ends current_timestamp + interval 60 second + on completion not preserve + enable + do +begin + drop table if exists _gh_ost_test_gho; +end ;; diff --git a/localtests/fail-drop-gho-table-no-data/expect_failure b/localtests/fail-drop-gho-table-no-data/expect_failure new file mode 100644 index 000000000..1ba887927 --- /dev/null +++ b/localtests/fail-drop-gho-table-no-data/expect_failure @@ -0,0 +1 @@ +Error 1146: Table 'test._gh_ost_test_gho' doesn't exist diff --git a/localtests/fail-drop-gho-table-no-data/extra_args b/localtests/fail-drop-gho-table-no-data/extra_args new file mode 100644 index 000000000..8b06fd447 --- /dev/null +++ b/localtests/fail-drop-gho-table-no-data/extra_args @@ -0,0 +1 @@ +--throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _gh_ost_test_ghc' diff --git a/localtests/fail-drop-gho-table/create.sql b/localtests/fail-drop-gho-table/create.sql new file mode 100644 index 000000000..327f4d47b --- /dev/null +++ b/localtests/fail-drop-gho-table/create.sql @@ -0,0 +1,22 @@ +drop table if exists gh_ost_test; +create table gh_ost_test ( + id int auto_increment, + i int not null, + color varchar(32), + primary key(id) +) auto_increment=1; + + +drop event if exists gh_ost_test; +delimiter ;; +create event gh_ost_test + on schedule every 1 second + starts current_timestamp + ends current_timestamp + interval 60 second + on completion not preserve + enable + do +begin + insert into gh_ost_test values (null, 1, 'blue'); + drop table if exists _gh_ost_test_gho; +end ;; diff --git a/localtests/fail-drop-gho-table/expect_failure b/localtests/fail-drop-gho-table/expect_failure new file mode 100644 index 000000000..1ba887927 --- /dev/null +++ b/localtests/fail-drop-gho-table/expect_failure @@ -0,0 +1 @@ +Error 1146: Table 'test._gh_ost_test_gho' doesn't exist From 1a45b5fffc77134dcdbb8e17eed7f2a5ad9dedc0 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Thu, 1 Aug 2019 15:19:38 +0300 Subject: [PATCH 3/4] bailing out on missing _ghc table on failure to writing to it --- .../fail-drop-ghc-table-no-read/create.sql | 23 +++++++++++++++++++ .../expect_failure | 1 + .../fail-drop-ghc-table-no-read/extra_args | 1 + 3 files changed, 25 insertions(+) create mode 100644 localtests/fail-drop-ghc-table-no-read/create.sql create mode 100644 localtests/fail-drop-ghc-table-no-read/expect_failure create mode 100644 localtests/fail-drop-ghc-table-no-read/extra_args diff --git a/localtests/fail-drop-ghc-table-no-read/create.sql b/localtests/fail-drop-ghc-table-no-read/create.sql new file mode 100644 index 000000000..1973ec1aa --- /dev/null +++ b/localtests/fail-drop-ghc-table-no-read/create.sql @@ -0,0 +1,23 @@ +drop table if exists gh_ost_test; +create table gh_ost_test ( + id int auto_increment, + i int not null, + color varchar(32), + primary key(id) +) auto_increment=1; + +insert into gh_ost_test values (null, 1, 'red'); + +drop event if exists gh_ost_test; +delimiter ;; +create event gh_ost_test + on schedule every 1 second + starts current_timestamp + interval 3 second + ends current_timestamp + interval 60 second + on completion not preserve + enable + do +begin + insert into gh_ost_test values (null, 1, 'blue'); + drop table if exists _gh_ost_test_ghc; +end ;; diff --git a/localtests/fail-drop-ghc-table-no-read/expect_failure b/localtests/fail-drop-ghc-table-no-read/expect_failure new file mode 100644 index 000000000..fd2f6bf65 --- /dev/null +++ b/localtests/fail-drop-ghc-table-no-read/expect_failure @@ -0,0 +1 @@ +Error 1146: Table 'test._gh_ost_test_ghc' doesn't exist diff --git a/localtests/fail-drop-ghc-table-no-read/extra_args b/localtests/fail-drop-ghc-table-no-read/extra_args new file mode 100644 index 000000000..ea18160c0 --- /dev/null +++ b/localtests/fail-drop-ghc-table-no-read/extra_args @@ -0,0 +1 @@ +--throttle-query='select sleep(1)' From 9aef2707ea160d7039f29809a41085887106d739 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Thu, 1 Aug 2019 15:19:55 +0300 Subject: [PATCH 4/4] bailing out on missing _ghc table on failure to writing to it --- go/logic/applier.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/logic/applier.go b/go/logic/applier.go index bc4824248..69fff830f 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -290,6 +290,7 @@ func (this *Applier) WriteChangelog(hint, value string) (string, error) { sql.EscapeName(this.migrationContext.GetChangelogTableName()), ) _, err := sqlutils.ExecNoPrepare(this.db, query, explicitId, hint, value) + this.migrationContext.PanicAbortIfTableError(err) return hint, err }