From 76cf5b620f65ccb99b792eb294ae16646ce219ba Mon Sep 17 00:00:00 2001 From: Xuecheng Zhang Date: Wed, 17 Jun 2020 13:48:25 +0800 Subject: [PATCH 1/2] fix(load): stop goroutines after restore returned (#744) --- dm/worker/dm-worker.toml | 4 +--- dm/worker/server_test.go | 4 ++-- dm/worker/task_checker_test.go | 2 +- dm/worker/worker_test.go | 2 +- loader/loader.go | 1 + pkg/retry/errors.go | 1 + 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dm/worker/dm-worker.toml b/dm/worker/dm-worker.toml index 657efa4c98..2440d0773e 100644 --- a/dm/worker/dm-worker.toml +++ b/dm/worker/dm-worker.toml @@ -7,6 +7,4 @@ log-file = "dm-worker.log" #dm-worker listen address worker-addr = ":8262" advertise-addr = "127.0.0.1:8262" -join = "127.0.0.1:8291" - - +join = "127.0.0.1:8261" diff --git a/dm/worker/server_test.go b/dm/worker/server_test.go index f901f33169..b76155d383 100644 --- a/dm/worker/server_test.go +++ b/dm/worker/server_test.go @@ -95,7 +95,7 @@ func createMockETCD(dir string, host string) (*embed.Etcd, error) { func (t *testServer) TestServer(c *C) { var ( - masterAddr = "127.0.0.1:8291" + masterAddr = "127.0.0.1:8261" workerAddr1 = "127.0.0.1:8262" keepAliveTTL = int64(1) ) @@ -224,7 +224,7 @@ func (t *testServer) TestServer(c *C) { func (t *testServer) TestWatchSourceBoundEtcdCompact(c *C) { var ( - masterAddr = "127.0.0.1:8291" + masterAddr = "127.0.0.1:8261" keepAliveTTL = int64(1) startRev = int64(1) ) diff --git a/dm/worker/task_checker_test.go b/dm/worker/task_checker_test.go index dc199e79e0..835af6edda 100644 --- a/dm/worker/task_checker_test.go +++ b/dm/worker/task_checker_test.go @@ -294,10 +294,10 @@ func (s *testTaskCheckerSuite) TestIsResumableError(c *check.C) { // only DM new error is checked {&tmysql.SQLError{1105, "unsupported modify column length 20 is less than origin 40", tmysql.DefaultMySQLState}, true}, {&tmysql.SQLError{1105, "unsupported drop integer primary key", tmysql.DefaultMySQLState}, true}, - {nil, true}, {terror.ErrDBExecuteFailed.Generate("file test.t3.sql: execute statement failed: USE `test_abc`;: context canceled"), true}, {terror.ErrDBExecuteFailed.Delegate(&tmysql.SQLError{1105, "unsupported modify column length 20 is less than origin 40", tmysql.DefaultMySQLState}, "alter table t modify col varchar(20)"), false}, {terror.ErrDBExecuteFailed.Delegate(&tmysql.SQLError{1105, "unsupported drop integer primary key", tmysql.DefaultMySQLState}, "alter table t drop column id"), false}, + {terror.ErrDBExecuteFailed.Delegate(&tmysql.SQLError{1067, "Invalid default value for 'ct'", tmysql.DefaultMySQLState}, "CREATE TABLE `tbl` (`c1` int(11) NOT NULL,`ct` datetime NOT NULL DEFAULT '0000-00-00 00:00:00' COMMENT '创建时间',PRIMARY KEY (`c1`)) ENGINE=InnoDB DEFAULT CHARSET=latin1"), false}, {terror.ErrDBExecuteFailed.Delegate(errors.New("Error 1062: Duplicate entry '5' for key 'PRIMARY'")), false}, {terror.ErrDBExecuteFailed.Delegate(errors.New("INSERT INTO `db`.`tbl` (`c1`,`c2`) VALUES (?,?);: Error 1406: Data too long for column 'c2' at row 1")), false}, // real error is generated by `Delegate` and multiple `Annotatef`, we use `New` to simplify it diff --git a/dm/worker/worker_test.go b/dm/worker/worker_test.go index ee5d9149bf..a7a9d88e2b 100644 --- a/dm/worker/worker_test.go +++ b/dm/worker/worker_test.go @@ -90,7 +90,7 @@ func (t *testServer) TestTaskAutoResume(c *C) { taskName = "sub-task-name" port = 8263 ) - hostName := "127.0.0.1:8291" + hostName := "127.0.0.1:8261" etcdDir := c.MkDir() ETCD, err := createMockETCD(etcdDir, "host://"+hostName) c.Assert(err, IsNil) diff --git a/loader/loader.go b/loader/loader.go index fa56a95222..ea3c01664c 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -483,6 +483,7 @@ func (l *Loader) Process(ctx context.Context, pr chan pb.ProcessResult) { err := l.Restore(newCtx) close(l.runFatalChan) // Restore returned, all potential fatal sent to l.runFatalChan + cancel() // cancel the goroutines created in `Restore`. failpoint.Inject("dontWaitWorkerExit", func(_ failpoint.Value) { l.logCtx.L().Info("", zap.String("failpoint", "dontWaitWorkerExit")) diff --git a/pkg/retry/errors.go b/pkg/retry/errors.go index 4b3caf4bf7..e4190c6109 100644 --- a/pkg/retry/errors.go +++ b/pkg/retry/errors.go @@ -30,6 +30,7 @@ var ( "unsupported modify collate", "unsupported drop integer primary key", "Unsupported collation", + "Invalid default value for", } // UnsupportedDMLMsgs list the error messages of some un-recoverable DML, which is used in task auto recovery From 05fc73139cba3ad292d9c65c1c35ad29678fe187 Mon Sep 17 00:00:00 2001 From: Xuecheng Zhang Date: Wed, 17 Jun 2020 16:16:25 +0800 Subject: [PATCH 2/2] CHANGELOG: add release notes for v1.0.6 (#748) --- CHANGELOG.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e7d0f9ad4..b46943c2dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,45 @@ All notable changes to this project will be documented in this file. +## [1.0.6] 2020-06-17 + +### Improvements + +- Support the original plaintext passwords for upstream and downstream databases +- Support configuring session variables for DM’s connections to upstream and downstream databases +- Remove the call stack information in some error messages returned by the `query-status` command when the data migration task encounters an exception +- Filter out the items that pass the precheck from the message returned when the precheck of the data migration task fails + +### Bug fixes + +- Fix the issue that the data migration task is not automatically paused and the error cannot be identified by executing the `query-status` command if an error occurs when the load unit creates a table +- Fix possible DM-worker panics when data migration tasks run simultaneously +- Fix the issue that the existing data migration task cannot be automatically restarted when the DM-worker process is restarted if the `enable-heartbeat` parameter of the task is set to `true` +- Fix the issue that the shard DDL conflict error may not be returned after the task is resumed +- Fix the issue that the `replicate lag` information is displayed incorrectly for an initial period of time when the `enable-heartbeat` parameter of the data migration task is set to `true` +- Fix the issue that `replicate lag` cannot be calculated using the heartbeat information when `lower_case_table_names` is set to `1` in the upstream database +- Disable the meaningless auto-resume tasks triggered by the `unsupported collation` error during data migration + +### Action required + +- When upgrading from a previous version, note that you must upgrade all DM components (dmctl/DM-master/DM-worker) together + +### Detailed Bug Fixes and Changes + +- Support the original plaintext passwords for upstream and downstream databases [#676](https://github.com/pingcap/dm/pull/676) +- Support configuring session variables for DM’s connections to upstream and downstream databases [#692](https://github.com/pingcap/dm/pull/692) +- Remove the call stack information in some error messages returned by the `query-status` command when the data migration task encounters an exception [#733](https://github.com/pingcap/dm/pull/733) [#747](https://github.com/pingcap/dm/pull/747) +- Filter out the items that pass the precheck from the message returned when the precheck of the data migration task fails [#730](https://github.com/pingcap/dm/pull/730) +- Fix the issue that the data migration task is not automatically paused and the error cannot be identified by executing the `query-status` command if an error occurs when the load unit creates a table [#747](https://github.com/pingcap/dm/pull/747) +- Fix possible DM-worker panics when data migration tasks run simultaneously [#710](https://github.com/pingcap/dm/pull/710) +- Fix the issue that the existing data migration task cannot be automatically restarted when the DM-worker process is restarted if the `enable-heartbeat` parameter of the task is set to `true` [#739](https://github.com/pingcap/dm/pull/739) +- Fix the issue that the shard DDL conflict error may not be returned after the task is resumed [#739](https://github.com/pingcap/dm/pull/739) [#742](https://github.com/pingcap/dm/pull/742) +- Fix the issue that the `replicate lag` information is displayed incorrectly for an initial period of time when the `enable-heartbeat` parameter of the data migration task is set to `true` [#704](https://github.com/pingcap/dm/pull/704) +- Fix the issue that `replicate lag` cannot be calculated using the heartbeat information when `lower_case_table_names` is set to `1` in the upstream database [#704](https://github.com/pingcap/dm/pull/704) +- Disable the meaningless auto-resume tasks triggered by the `unsupported collation` error during data migration [#735](https://github.com/pingcap/dm/pull/735) +- Optimize some logs [#660](https://github.com/pingcap/dm/pull/660) [#724](https://github.com/pingcap/dm/pull/724) [#738](https://github.com/pingcap/dm/pull/738) + + ## [1.0.5] 2020-04-27 ### Improvements