Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Panic on missing _ghc and _gho tables #772

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions go/base/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -752,3 +752,20 @@ func (this *MigrationContext) ReadConfigFile() error {

return nil
}

func (this *MigrationContext) PanicAbortIfTableError(err error) {
if err == nil {
return
}
if strings.Contains(err.Error(), mysql.Error1146TableDoesntExist) || strings.Contains(err.Error(), mysql.Error1017CantFindFile) {
this.PanicAbortOnError(err)
}
// otherwise irrelevant error and we do not panic
}

func (this *MigrationContext) PanicAbortOnError(err error) {
if err == nil {
return
}
this.PanicAbort <- err
}
3 changes: 3 additions & 0 deletions go/logic/applier.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ func (this *Applier) WriteChangelog(hint, value string) (string, error) {
sql.EscapeName(this.migrationContext.GetChangelogTableName()),
)
_, err := sqlutils.ExecNoPrepare(this.db, query, explicitId, hint, value)
this.migrationContext.PanicAbortIfTableError(err)
return hint, err
}

Expand Down Expand Up @@ -916,6 +917,8 @@ func (this *Applier) AtomicCutoverRename(sessionIdChan chan int64, tablesRenamed
)
log.Infof("Issuing and expecting this to block: %s", query)
if _, err := tx.Exec(query); err != nil {
this.migrationContext.PanicAbortIfTableError(err)

tablesRenamed <- err
return log.Errore(err)
}
Expand Down
17 changes: 10 additions & 7 deletions go/logic/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo
// there's an error. Let's try again.
}
if len(notFatalHint) == 0 {
this.migrationContext.PanicAbort <- err
this.migrationContext.PanicAbortOnError(err)
}
return err
}
Expand Down Expand Up @@ -172,7 +172,7 @@ func (this *Migrator) retryOperationWithExponentialBackoff(operation func() erro
}
}
if len(notFatalHint) == 0 {
this.migrationContext.PanicAbort <- err
this.migrationContext.PanicAbortOnError(err)
}
return err
}
Expand All @@ -191,14 +191,14 @@ func (this *Migrator) executeAndThrottleOnError(operation func() error) (err err
// consumes and drops any further incoming events that may be left hanging.
func (this *Migrator) consumeRowCopyComplete() {
if err := <-this.rowCopyComplete; err != nil {
this.migrationContext.PanicAbort <- err
this.migrationContext.PanicAbortOnError(err)
}
atomic.StoreInt64(&this.rowCopyCompleteFlag, 1)
this.migrationContext.MarkRowCopyEndTime()
go func() {
for err := range this.rowCopyComplete {
if err != nil {
this.migrationContext.PanicAbort <- err
this.migrationContext.PanicAbortOnError(err)
}
}
}()
Expand Down Expand Up @@ -620,17 +620,20 @@ func (this *Migrator) atomicCutOver() (err error) {
tableUnlocked := make(chan error, 2)
go func() {
if err := this.applier.AtomicCutOverMagicLock(lockOriginalSessionIdChan, tableLocked, okToUnlockTable, tableUnlocked); err != nil {
this.migrationContext.PanicAbortIfTableError(err)
log.Errore(err)
}
}()
if err := <-tableLocked; err != nil {
this.migrationContext.PanicAbortIfTableError(err)
return log.Errore(err)
}
lockOriginalSessionId := <-lockOriginalSessionIdChan
log.Infof("Session locking original & magic tables is %+v", lockOriginalSessionId)
// At this point we know the original table is locked.
// We know any newly incoming DML on original table is blocked.
if err := this.waitForEventsUpToLock(); err != nil {
this.migrationContext.PanicAbortIfTableError(err)
return log.Errore(err)
}

Expand All @@ -644,6 +647,7 @@ func (this *Migrator) atomicCutOver() (err error) {
go func() {
if err := this.applier.AtomicCutoverRename(renameSessionIdChan, tablesRenamed); err != nil {
// Abort! Release the lock
this.migrationContext.PanicAbortIfTableError(err)
atomic.StoreInt64(&tableRenameKnownToHaveFailed, 1)
okToUnlockTable <- true
}
Expand Down Expand Up @@ -999,9 +1003,8 @@ func (this *Migrator) initiateStreaming() error {

go func() {
log.Debugf("Beginning streaming")
err := this.eventsStreamer.StreamEvents(this.canStopStreaming)
if err != nil {
this.migrationContext.PanicAbort <- err
if err := this.eventsStreamer.StreamEvents(this.canStopStreaming); err != nil {
this.migrationContext.PanicAbortOnError(err)
}
log.Debugf("Done streaming")
}()
Expand Down
2 changes: 1 addition & 1 deletion go/logic/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ help # This message
return NoPrintStatusRule, err
}
err := fmt.Errorf("User commanded 'panic'. The migration will be aborted without cleanup. Please drop the gh-ost tables before trying again.")
this.migrationContext.PanicAbort <- err
this.migrationContext.PanicAbortOnError(err)
return NoPrintStatusRule, err
}
default:
Expand Down
7 changes: 4 additions & 3 deletions go/logic/throttler.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ func (this *Throttler) collectControlReplicasLag() {
lagResult := &mysql.ReplicationLagResult{Key: connectionConfig.Key}
go func() {
lagResult.Lag, lagResult.Err = readReplicaLag(connectionConfig)
this.migrationContext.PanicAbortIfTableError(lagResult.Err)
lagResults <- lagResult
}()
}
Expand Down Expand Up @@ -317,7 +318,7 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
// Regardless of throttle, we take opportunity to check for panic-abort
if this.migrationContext.PanicFlagFile != "" {
if base.FileExists(this.migrationContext.PanicFlagFile) {
this.migrationContext.PanicAbort <- fmt.Errorf("Found panic-file %s. Aborting without cleanup", this.migrationContext.PanicFlagFile)
this.migrationContext.PanicAbortOnError(fmt.Errorf("Found panic-file %s. Aborting without cleanup", this.migrationContext.PanicFlagFile))
}
}

Expand All @@ -340,15 +341,15 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
}

if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 {
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)
this.migrationContext.PanicAbortOnError(fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold))
}
if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds > 0 {
log.Errorf("critical-load met once: %s=%d, >=%d. Will check again in %d millis", variableName, value, threshold, this.migrationContext.CriticalLoadIntervalMilliseconds)
go func() {
timer := time.NewTimer(time.Millisecond * time.Duration(this.migrationContext.CriticalLoadIntervalMilliseconds))
<-timer.C
if criticalLoadMetAgain, variableName, value, threshold, _ := this.criticalLoadIsMet(); criticalLoadMetAgain {
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met again after %d millis: %s=%d, >=%d", this.migrationContext.CriticalLoadIntervalMilliseconds, variableName, value, threshold)
this.migrationContext.PanicAbortOnError(fmt.Errorf("critical-load met again after %d millis: %s=%d, >=%d", this.migrationContext.CriticalLoadIntervalMilliseconds, variableName, value, threshold))
}
}()
}
Expand Down
5 changes: 5 additions & 0 deletions go/mysql/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ import (
"github.com/outbrain/golib/sqlutils"
)

const (
Error1017CantFindFile = "Error 1017:"
Error1146TableDoesntExist = "Error 1146:"
)

const MaxTableNameLength = 64
const MaxReplicationPasswordLength = 32

Expand Down
23 changes: 23 additions & 0 deletions localtests/fail-drop-ghc-table-no-read/create.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
drop table if exists gh_ost_test;
create table gh_ost_test (
id int auto_increment,
i int not null,
color varchar(32),
primary key(id)
) auto_increment=1;

insert into gh_ost_test values (null, 1, 'red');

drop event if exists gh_ost_test;
delimiter ;;
create event gh_ost_test
on schedule every 1 second
starts current_timestamp + interval 3 second
ends current_timestamp + interval 60 second
on completion not preserve
enable
do
begin
insert into gh_ost_test values (null, 1, 'blue');
drop table if exists _gh_ost_test_ghc;
end ;;
1 change: 1 addition & 0 deletions localtests/fail-drop-ghc-table-no-read/expect_failure
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Error 1146: Table 'test._gh_ost_test_ghc' doesn't exist
1 change: 1 addition & 0 deletions localtests/fail-drop-ghc-table-no-read/extra_args
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--throttle-query='select sleep(1)'
23 changes: 23 additions & 0 deletions localtests/fail-drop-ghc-table/create.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
drop table if exists gh_ost_test;
create table gh_ost_test (
id int auto_increment,
i int not null,
color varchar(32),
primary key(id)
) auto_increment=1;

insert into gh_ost_test values (null, 1, 'red');

drop event if exists gh_ost_test;
delimiter ;;
create event gh_ost_test
on schedule every 1 second
starts current_timestamp + interval 3 second
ends current_timestamp + interval 60 second
on completion not preserve
enable
do
begin
insert into gh_ost_test values (null, 1, 'blue');
drop table if exists _gh_ost_test_ghc;
end ;;
1 change: 1 addition & 0 deletions localtests/fail-drop-ghc-table/expect_failure
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Error 1146: Table 'test._gh_ost_test_ghc' doesn't exist
22 changes: 22 additions & 0 deletions localtests/fail-drop-gho-table-no-data/create.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
drop table if exists gh_ost_test;
create table gh_ost_test (
id int auto_increment,
i int not null,
color varchar(32),
primary key(id)
) auto_increment=1;

insert into gh_ost_test values (null, 1, 'blue');

drop event if exists gh_ost_test;
delimiter ;;
create event gh_ost_test
on schedule every 1 second
starts current_timestamp
ends current_timestamp + interval 60 second
on completion not preserve
enable
do
begin
drop table if exists _gh_ost_test_gho;
end ;;
1 change: 1 addition & 0 deletions localtests/fail-drop-gho-table-no-data/expect_failure
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Error 1146: Table 'test._gh_ost_test_gho' doesn't exist
1 change: 1 addition & 0 deletions localtests/fail-drop-gho-table-no-data/extra_args
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _gh_ost_test_ghc'
22 changes: 22 additions & 0 deletions localtests/fail-drop-gho-table/create.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
drop table if exists gh_ost_test;
create table gh_ost_test (
id int auto_increment,
i int not null,
color varchar(32),
primary key(id)
) auto_increment=1;


drop event if exists gh_ost_test;
delimiter ;;
create event gh_ost_test
on schedule every 1 second
starts current_timestamp
ends current_timestamp + interval 60 second
on completion not preserve
enable
do
begin
insert into gh_ost_test values (null, 1, 'blue');
drop table if exists _gh_ost_test_gho;
end ;;
1 change: 1 addition & 0 deletions localtests/fail-drop-gho-table/expect_failure
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Error 1146: Table 'test._gh_ost_test_gho' doesn't exist