From f8ece5b996b567e47ceeff7b9f1c5625fa3fe827 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 20 Aug 2019 18:05:52 +0800 Subject: [PATCH 01/50] define conn split workerConn and upstreamConn in syncer split conn and db unify conn in syncer and loader --- dm/config/subtask.go | 22 +++ loader/checkpoint.go | 17 +- loader/checkpoint_test.go | 7 +- loader/db.go | 59 +++++-- loader/loader.go | 38 ++-- pkg/{baseconn/conn.go => conn/baseconn.go} | 80 ++------- .../conn_test.go => conn/baseconn_test.go} | 16 +- pkg/conn/basedb.go | 90 ++++++++++ pkg/conn/basedb_test.go | 59 +++++++ pkg/retry/errors.go | 13 ++ pkg/retry/strategy_test.go | 7 + syncer/checkpoint.go | 45 ++--- syncer/checkpoint_test.go | 8 +- syncer/db.go | 162 ++++++++++++------ syncer/db_test.go | 13 +- syncer/online_ddl.go | 27 +-- syncer/sharding_group.go | 26 +-- syncer/syncer.go | 100 +++++------ syncer/syncer_test.go | 43 +++-- 19 files changed, 548 insertions(+), 284 deletions(-) rename pkg/{baseconn/conn.go => conn/baseconn.go} (71%) rename pkg/{baseconn/conn_test.go => conn/baseconn_test.go} (89%) create mode 100644 pkg/conn/basedb.go create mode 100644 pkg/conn/basedb_test.go diff --git a/dm/config/subtask.go b/dm/config/subtask.go index 082165a5bb..47a36a8f24 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -44,6 +44,26 @@ var ( defaultMaxAllowedPacket = 64 * 1024 * 1024 // 64MiB, equal to TiDB's default ) +// RawDBConfig contains some low level database config +type RawDBConfig struct { + MaxIdleConns int + ReadTimeout string + WriteTimeout string +} + +// DefaultRawDBConfig returns a default raw database config +func DefaultRawDBConfig(readTimeout string, maxIdleConns ...int) *RawDBConfig { + if len(maxIdleConns) == 0 { + return &RawDBConfig{ + ReadTimeout: readTimeout, + } + } + return &RawDBConfig{ + MaxIdleConns: maxIdleConns[0], + ReadTimeout: readTimeout, + } +} + // DBConfig is the DB configuration. type DBConfig struct { Host string `toml:"host" json:"host" yaml:"host"` @@ -51,6 +71,8 @@ type DBConfig struct { User string `toml:"user" json:"user" yaml:"user"` Password string `toml:"password" json:"-" yaml:"password"` // omit it for privacy MaxAllowedPacket *int `toml:"max-allowed-packet" json:"max-allowed-packet" yaml:"max-allowed-packet"` + + RawDBCfg *RawDBConfig `toml:"-" json:"-" yaml:"-"` } func (db *DBConfig) String() string { diff --git a/loader/checkpoint.go b/loader/checkpoint.go index 1b3e24d4c6..94413b270f 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -20,7 +20,9 @@ import ( "time" "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" + "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" "go.uber.org/zap" @@ -63,7 +65,8 @@ type CheckPoint interface { // RemoteCheckPoint implements CheckPoint by saving status in remote database system, mostly in TiDB. type RemoteCheckPoint struct { - conn *Conn // NOTE: use dbutil in tidb-tools later + db *conn.BaseDB + conn *WorkerConn // NOTE: use dbutil in tidb-tools later id string schema string table string @@ -73,7 +76,7 @@ type RemoteCheckPoint struct { } func newRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id string) (CheckPoint, error) { - conn, err := createConn(cfg) + db, conn, err := createConn(tctx.Context(), cfg) if err != nil { return nil, err } @@ -81,6 +84,7 @@ func newRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id s newtctx := tctx.WithLogger(tctx.L().WithFields(zap.String("component", "remote checkpoint"))) cp := &RemoteCheckPoint{ + db: db, conn: conn, id: id, restoringFiles: make(map[string]map[string]FilePosSet), @@ -293,7 +297,14 @@ func (cp *RemoteCheckPoint) Init(filename string, endPos int64) error { // Close implements CheckPoint.Close func (cp *RemoteCheckPoint) Close() { - closeConn(cp.conn) + err := cp.conn.Close() + if err != nil { + cp.tctx.L().Error("close checkpoint connection error", log.ShortError(err)) + } + err = cp.db.Close() + if err != nil { + cp.tctx.L().Error("close checkpoint db error", log.ShortError(err)) + } } // GenSQL implements CheckPoint.GenSQL diff --git a/loader/checkpoint_test.go b/loader/checkpoint_test.go index c388665a0b..65bf724617 100644 --- a/loader/checkpoint_test.go +++ b/loader/checkpoint_test.go @@ -113,9 +113,12 @@ func (t *testCheckPointSuite) TestForDB(c *C) { c.Assert(count, Equals, len(cases)) // update checkpoints - conn, err := createConn(t.cfg) + db, conn, err := createConn(tctx.Context(), t.cfg) c.Assert(err, IsNil) - defer closeConn(conn) + defer func() { + conn.Close() + db.Close() + }() for _, cs := range cases { sql2 := cp.GenSQL(cs.filename, cs.endPos) err = conn.executeSQL(tctx, []string{sql2}) diff --git a/loader/db.go b/loader/db.go index ea9dff035a..f8c4af09b8 100644 --- a/loader/db.go +++ b/loader/db.go @@ -14,8 +14,8 @@ package loader import ( + "context" "database/sql" - "fmt" "strconv" "strings" "time" @@ -27,7 +27,7 @@ import ( "go.uber.org/zap" "github.com/pingcap/dm/dm/config" - "github.com/pingcap/dm/pkg/baseconn" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/retry" @@ -35,13 +35,13 @@ import ( "github.com/pingcap/dm/pkg/utils" ) -// Conn represents a live DB connection -type Conn struct { +// WorkerConn represents a live DB connection +type WorkerConn struct { cfg *config.SubTaskConfig - baseConn *baseconn.BaseConn + baseConn *conn.BaseConn } -func (conn *Conn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { +func (conn *WorkerConn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { if conn == nil || conn.baseConn == nil { return nil, terror.ErrDBUnExpect.Generate("database connection not valid") } @@ -90,7 +90,7 @@ func (conn *Conn) querySQL(ctx *tcontext.Context, query string, args ...interfac return ret.(*sql.Rows), nil } -func (conn *Conn) executeSQL(ctx *tcontext.Context, queries []string, args ...[]interface{}) error { +func (conn *WorkerConn) executeSQL(ctx *tcontext.Context, queries []string, args ...[]interface{}) error { if len(queries) == 0 { return nil } @@ -149,23 +149,44 @@ func (conn *Conn) executeSQL(ctx *tcontext.Context, queries []string, args ...[] return err } -func createConn(cfg *config.SubTaskConfig) (*Conn, error) { - dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4&maxAllowedPacket=%d", - cfg.To.User, cfg.To.Password, cfg.To.Host, cfg.To.Port, *cfg.To.MaxAllowedPacket) - baseConn, err := baseconn.NewBaseConn(dbDSN, &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()) - if err != nil { - return nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) +// Close release db connection resource, return it to BaseDB.db connection pool +func (conn *WorkerConn) Close() error { + if conn == nil || conn.baseConn == nil { + return nil } - - return &Conn{baseConn: baseConn, cfg: cfg}, nil + return conn.baseConn.Close() } -func closeConn(conn *Conn) error { - if conn.baseConn == nil { - return nil +func createConn(ctx context.Context, cfg *config.SubTaskConfig) (*conn.BaseDB, *WorkerConn, error) { + baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) + if err != nil { + return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) } + baseConn, err := baseDB.GetBaseConn(ctx) + if err != nil { + return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) + } + return baseDB, &WorkerConn{baseConn: baseConn, cfg: cfg}, nil +} - return terror.DBErrorAdapt(conn.baseConn.Close(), terror.ErrDBDriverError) +func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*WorkerConn, error) { + baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) + if err != nil { + return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) + } + conns := make([]*WorkerConn, 0, workerCount) + for i := 0; i < workerCount; i++ { + baseConn, err := baseDB.GetBaseConn(tctx.Context()) + if err != nil { + err := baseDB.Close() + if err != nil { + tctx.L().Error("failed to close baseDB") + } + return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) + } + conns = append(conns, &WorkerConn{baseConn: baseConn, cfg: cfg}) + } + return baseDB, conns, nil } func isErrDBExists(err error) bool { diff --git a/loader/loader.go b/loader/loader.go index fc3ffccd72..abe8c33798 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/dm/unit" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" fr "github.com/pingcap/dm/pkg/func-rollback" "github.com/pingcap/dm/pkg/log" @@ -55,7 +56,6 @@ type DataFiles []string // Tables2DataFiles represent all data files of a table collection as a map type Tables2DataFiles map[string]DataFiles - type dataJob struct { sql string schema string @@ -77,7 +77,7 @@ type Worker struct { id int cfg *config.SubTaskConfig checkPoint CheckPoint - conn *Conn + conn *WorkerConn wg sync.WaitGroup jobQueue chan *dataJob loader *Loader @@ -89,18 +89,13 @@ type Worker struct { // NewWorker returns a Worker. func NewWorker(loader *Loader, id int) (worker *Worker, err error) { - conn, err := createConn(loader.cfg) - if err != nil { - return nil, err - } - ctctx := loader.tctx.WithLogger(loader.tctx.L().WithFields(zap.Int("worker ID", id))) return &Worker{ id: id, cfg: loader.cfg, checkPoint: loader.checkPoint, - conn: conn, + conn: loader.toDBConns[id], jobQueue: make(chan *dataJob, jobCount), loader: loader, tctx: ctctx, @@ -115,7 +110,7 @@ func (w *Worker) Close() { close(w.jobQueue) w.wg.Wait() - closeConn(w.conn) + w.conn.Close() } func (w *Worker) run(ctx context.Context, fileJobQueue chan *fileJob, workerWg *sync.WaitGroup, runFatalChan chan *pb.ProcessError) { @@ -342,6 +337,9 @@ type Loader struct { pool []*Worker closed sync2.AtomicBool + toDB *conn.BaseDB + toDBConns []*WorkerConn + totalDataSize sync2.AtomicInt64 totalFileCount sync2.AtomicInt64 // schema + table + data finishedDataSize sync2.AtomicInt64 @@ -409,6 +407,11 @@ func (l *Loader) Init() (err error) { } } + l.toDB, l.toDBConns, err = createConns(l.tctx, l.cfg, l.cfg.PoolSize) + if err != nil { + return err + } + return nil } @@ -556,6 +559,10 @@ func (l *Loader) Close() { } l.stopLoad() + err := l.toDB.Close() + if err != nil { + l.tctx.L().Error("close toDB error", log.ShortError(err)) + } l.checkPoint.Close() l.closed.Set(true) } @@ -855,7 +862,7 @@ func (l *Loader) prepare() error { } // restoreSchema creates schema -func (l *Loader) restoreSchema(conn *Conn, sqlFile, schema string) error { +func (l *Loader) restoreSchema(conn *WorkerConn, sqlFile, schema string) error { err := l.restoreStructure(conn, sqlFile, schema, "") if err != nil { if isErrDBExists(err) { @@ -868,7 +875,7 @@ func (l *Loader) restoreSchema(conn *Conn, sqlFile, schema string) error { } // restoreTable creates table -func (l *Loader) restoreTable(conn *Conn, sqlFile, schema, table string) error { +func (l *Loader) restoreTable(conn *WorkerConn, sqlFile, schema, table string) error { err := l.restoreStructure(conn, sqlFile, schema, table) if err != nil { if isErrTableExists(err) { @@ -881,7 +888,7 @@ func (l *Loader) restoreTable(conn *Conn, sqlFile, schema, table string) error { } // restoreStruture creates schema or table -func (l *Loader) restoreStructure(conn *Conn, sqlFile string, schema string, table string) error { +func (l *Loader) restoreStructure(conn *WorkerConn, sqlFile string, schema string, table string) error { f, err := os.Open(sqlFile) if err != nil { return terror.ErrLoadUnitReadSchemaFile.Delegate(err) @@ -968,11 +975,14 @@ func fetchMatchedLiteral(ctx *tcontext.Context, router *router.Table, schema, ta func (l *Loader) restoreData(ctx context.Context) error { begin := time.Now() - conn, err := createConn(l.cfg) + db, conn, err := createConn(ctx, l.cfg) if err != nil { return err } - defer conn.baseConn.Close() + defer func() { + conn.baseConn.Close() + db.Close() + }() dispatchMap := make(map[string]*fileJob) diff --git a/pkg/baseconn/conn.go b/pkg/conn/baseconn.go similarity index 71% rename from pkg/baseconn/conn.go rename to pkg/conn/baseconn.go index 423031e8a0..1cbfd2c780 100644 --- a/pkg/baseconn/conn.go +++ b/pkg/conn/baseconn.go @@ -11,61 +11,33 @@ // See the License for the specific language governing permissions and // limitations under the License. -package baseconn +package conn import ( "database/sql" - "go.uber.org/zap" - tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/retry" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" + + "go.uber.org/zap" ) // BaseConn wraps a connection to DB type BaseConn struct { - DB *sql.DB - - // for reset - DSN string + DBConn *sql.Conn RetryStrategy retry.Strategy - - RawDBCfg *RawDBConfig -} - -// RawDBConfig contains some low level database config -type RawDBConfig struct { - MaxIdleConns int -} - -// DefaultRawDBConfig returns a default raw database config -func DefaultRawDBConfig() *RawDBConfig { - return &RawDBConfig{ - MaxIdleConns: 2, - } } -// NewBaseConn builds BaseConn to connect real DB -func NewBaseConn(dbDSN string, strategy retry.Strategy, rawDBCfg *RawDBConfig) (*BaseConn, error) { - db, err := sql.Open("mysql", dbDSN) - if err != nil { - return nil, terror.ErrDBDriverError.Delegate(err) - } - // set max idle connection limit before any database call - db.SetMaxIdleConns(rawDBCfg.MaxIdleConns) - err = db.Ping() - if err != nil { - db.Close() - return nil, terror.ErrDBDriverError.Delegate(err) - } +// newBaseConn builds BaseConn to connect real DB +func newBaseConn(conn *sql.Conn, strategy retry.Strategy) (*BaseConn, error) { if strategy == nil { strategy = &retry.FiniteRetryStrategy{} } - return &BaseConn{db, dbDSN, strategy, rawDBCfg}, nil + return &BaseConn{conn, strategy}, nil } // SetRetryStrategy set retry strategy for baseConn @@ -77,42 +49,16 @@ func (conn *BaseConn) SetRetryStrategy(strategy retry.Strategy) error { return nil } -// ResetConn generates new *DB with new connection pool to take place old one -func (conn *BaseConn) ResetConn(tctx *tcontext.Context) error { - if conn == nil { - return terror.ErrDBUnExpect.Generate("database connection not valid") - } - db, err := sql.Open("mysql", conn.DSN) - if err != nil { - return terror.ErrDBDriverError.Delegate(err) - } - // set max idle connection limit before any database call - db.SetMaxIdleConns(conn.RawDBCfg.MaxIdleConns) - err = db.Ping() - if err != nil { - db.Close() - return terror.ErrDBDriverError.Delegate(err) - } - if conn.DB != nil { - err := conn.DB.Close() - if err != nil { - tctx.L().Warn("reset connection", log.ShortError(err)) - } - } - conn.DB = db - return nil -} - // QuerySQL defines query statement, and connect to real DB func (conn *BaseConn) QuerySQL(tctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { - if conn == nil || conn.DB == nil { + if conn == nil || conn.DBConn == nil { return nil, terror.ErrDBUnExpect.Generate("database connection not valid") } tctx.L().Debug("query statement", zap.String("query", utils.TruncateString(query, -1)), zap.String("argument", utils.TruncateInterface(args, -1))) - rows, err := conn.DB.QueryContext(tctx.Context(), query, args...) + rows, err := conn.DBConn.QueryContext(tctx.Context(), query, args...) if err != nil { tctx.L().Error("query statement failed", @@ -132,11 +78,11 @@ func (conn *BaseConn) ExecuteSQLWithIgnoreError(tctx *tcontext.Context, ignoreEr if len(queries) == 0 { return 0, nil } - if conn == nil || conn.DB == nil { + if conn == nil || conn.DBConn == nil { return 0, terror.ErrDBUnExpect.Generate("database connection not valid") } - txn, err := conn.DB.Begin() + txn, err := conn.DBConn.BeginTx(tctx.Context(), nil) if err != nil { return 0, terror.ErrDBExecuteFailed.Delegate(err, "begin") @@ -202,8 +148,8 @@ func (conn *BaseConn) ApplyRetryStrategy(tctx *tcontext.Context, params retry.Pa // Close release DB resource func (conn *BaseConn) Close() error { - if conn == nil || conn.DB == nil { + if conn == nil || conn.DBConn == nil { return nil } - return terror.ErrDBUnExpect.Delegate(conn.DB.Close(), "close") + return terror.ErrDBUnExpect.Delegate(conn.DBConn.Close(), "close") } diff --git a/pkg/baseconn/conn_test.go b/pkg/conn/baseconn_test.go similarity index 89% rename from pkg/baseconn/conn_test.go rename to pkg/conn/baseconn_test.go index 4863f592e8..dd57c51e06 100644 --- a/pkg/baseconn/conn_test.go +++ b/pkg/conn/baseconn_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package baseconn +package conn import ( "errors" @@ -35,15 +35,12 @@ type testBaseConnSuite struct { } func (t *testBaseConnSuite) TestBaseConn(c *C) { - baseConn, err := NewBaseConn("error dsn", nil, DefaultRawDBConfig()) - c.Assert(terror.ErrDBDriverError.Equal(err), IsTrue) + baseConn, err := newBaseConn(nil, nil) + c.Assert(err, IsNil) tctx := tcontext.Background() - err = baseConn.ResetConn(tctx) - c.Assert(terror.ErrDBUnExpect.Equal(err), IsTrue) - err = baseConn.SetRetryStrategy(nil) - c.Assert(terror.ErrDBUnExpect.Equal(err), IsTrue) + c.Assert(err, IsNil) _, err = baseConn.QuerySQL(tctx, "select 1") c.Assert(terror.ErrDBUnExpect.Equal(err), IsTrue) @@ -53,7 +50,10 @@ func (t *testBaseConnSuite) TestBaseConn(c *C) { db, mock, err := sqlmock.New() c.Assert(err, IsNil) - baseConn = &BaseConn{db, "", nil, DefaultRawDBConfig()} + dbConn, err := db.Conn(tctx.Context()) + c.Assert(err, IsNil) + + baseConn = &BaseConn{dbConn, nil} err = baseConn.SetRetryStrategy(&retry.FiniteRetryStrategy{}) c.Assert(err, IsNil) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go new file mode 100644 index 0000000000..5cfdabe60a --- /dev/null +++ b/pkg/conn/basedb.go @@ -0,0 +1,90 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package conn + +import ( + "context" + "database/sql" + "fmt" + + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/retry" + "github.com/pingcap/dm/pkg/terror" +) + +// DBProvider providers BaseDB instance +type DBProvider interface { + Apply(config config.DBConfig) (*BaseDB, error) +} + +type defaultDBProvider struct { +} + +// DefaultDBProvider is global instance of DBProvider +var DefaultDBProvider DBProvider + +func init() { + DefaultDBProvider = &defaultDBProvider{} +} + +func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { + dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4&interpolateParams=true&maxAllowedPacket=%d", + config.User, config.Password, config.Host, config.Port, *config.MaxAllowedPacket) + + maxIdleConn := 0 + rawCfg := config.RawDBCfg + if rawCfg != nil { + if rawCfg.ReadTimeout != "" { + dsn += fmt.Sprintf("&readTimeout=%s", rawCfg.ReadTimeout) + } + if rawCfg.WriteTimeout != "" { + dsn += fmt.Sprintf("&writeTimeout=%s", rawCfg.WriteTimeout) + } + maxIdleConn = rawCfg.MaxIdleConns + } + db, err := sql.Open("mysql", dsn) + + if maxIdleConn > 0 { + db.SetMaxIdleConns(maxIdleConn) + } + + if err != nil { + return nil, terror.ErrDBDriverError.Delegate(err) + } + return &BaseDB{db, &retry.FiniteRetryStrategy{}}, nil +} + +// BaseDB wraps *sql.DB +type BaseDB struct { + DB *sql.DB + + Retry retry.Strategy +} + +// GetBaseConn retrieves *BaseConn which has own retryStrategy +func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { + conn, err := d.DB.Conn(ctx) + if err != nil { + return nil, terror.ErrDBDriverError.Delegate(err) + } + return newBaseConn(conn, d.Retry) +} + +// Close release db resource +func (d *BaseDB) Close() error { + if d == nil || d.DB == nil { + return nil + } + return d.DB.Close() +} diff --git a/pkg/conn/basedb_test.go b/pkg/conn/basedb_test.go new file mode 100644 index 0000000000..f9e8c78730 --- /dev/null +++ b/pkg/conn/basedb_test.go @@ -0,0 +1,59 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package conn + +import ( + "github.com/DATA-DOG/go-sqlmock" + . "github.com/pingcap/check" + tcontext "github.com/pingcap/dm/pkg/context" + "github.com/pingcap/dm/pkg/retry" +) + +var _ = Suite(&testBaseDBSuite{}) + +type testBaseDBSuite struct { +} + +func (t *testBaseDBSuite) TestBaseDB(c *C) { + db, mock, err := sqlmock.New() + c.Assert(err, IsNil) + + baseDB := &BaseDB{db, &retry.FiniteRetryStrategy{}} + + tctx := tcontext.Background() + + dbConn, err := baseDB.GetBaseConn(tctx.Context()) + c.Assert(dbConn, NotNil) + c.Assert(err, IsNil) + + mock.ExpectQuery("select 1").WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("1")) + rows, err := dbConn.QuerySQL(tctx, "select 1") + c.Assert(err, IsNil) + ids := make([]int, 0, 1) + for rows.Next() { + var id int + err = rows.Scan(&id) + c.Assert(err, IsNil) + ids = append(ids, id) + } + c.Assert(len(ids), Equals, 1) + c.Assert(ids[0], Equals, 1) + + mock.ExpectBegin() + mock.ExpectExec("create database test").WillReturnResult(sqlmock.NewResult(1, 1)) + mock.ExpectCommit() + affected, err := dbConn.ExecuteSQL(tctx, []string{"create database test"}) + c.Assert(err, IsNil) + c.Assert(affected, Equals, 1) +} diff --git a/pkg/retry/errors.go b/pkg/retry/errors.go index fb2e10a2f8..dae5919b62 100644 --- a/pkg/retry/errors.go +++ b/pkg/retry/errors.go @@ -14,6 +14,7 @@ package retry import ( + "database/sql/driver" "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" tmysql "github.com/pingcap/parser/mysql" @@ -35,3 +36,15 @@ func IsRetryableError(err error) bool { } return false } + +// IsConnectionError tells whether this error should reconnect to Database +func IsConnectionError(err error) bool { + err = errors.Cause(err) + switch err { + case driver.ErrBadConn: + return true + case mysql.ErrInvalidConn: + return true + } + return false +} diff --git a/pkg/retry/strategy_test.go b/pkg/retry/strategy_test.go index 9e09e238ff..466fe1ba07 100644 --- a/pkg/retry/strategy_test.go +++ b/pkg/retry/strategy_test.go @@ -79,6 +79,13 @@ func (t *testStrategySuite) TestFiniteRetryStrategy(c *C) { c.Assert(opCount, Equals, 0) c.Assert(terror.ErrDBInvalidConn.Equal(err), IsTrue) + params.IsRetryableFn = func(int, error) bool { + return IsConnectionError(err) + } + _, opCount, err = strategy.Apply(ctx, params, operateFn) + c.Assert(opCount, Equals, 3) + c.Assert(terror.ErrDBInvalidConn.Equal(err), IsTrue) + retValue := "success" operateFn = func(*tcontext.Context) (interface{}, error) { return retValue, nil diff --git a/syncer/checkpoint.go b/syncer/checkpoint.go index 1d19c48c5d..26fff07b1d 100644 --- a/syncer/checkpoint.go +++ b/syncer/checkpoint.go @@ -19,16 +19,16 @@ import ( "sync" "time" - "github.com/pingcap/failpoint" - tmysql "github.com/pingcap/parser/mysql" - "github.com/siddontang/go-mysql/mysql" - "go.uber.org/zap" - "github.com/pingcap/dm/dm/config" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" + + "github.com/pingcap/failpoint" + tmysql "github.com/pingcap/parser/mysql" + "github.com/siddontang/go-mysql/mysql" + "go.uber.org/zap" ) /* @@ -122,7 +122,7 @@ func (b *binlogPoint) String() string { // because, when restarting to continue the sync, all sharding DDLs must try-sync again type CheckPoint interface { // Init initializes the CheckPoint - Init(conn *Conn) error + Init(conn *WorkerConn) error // Close closes the CheckPoint Close() @@ -186,7 +186,7 @@ type RemoteCheckPoint struct { cfg *config.SubTaskConfig - db *Conn + dbConn *WorkerConn schema string // schema name, set through task config table string // table name, now it's task name id string // checkpoint ID, now it is `source-id` @@ -226,15 +226,16 @@ func NewRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id s } // Init implements CheckPoint.Init -func (cp *RemoteCheckPoint) Init(conn *Conn) error { +func (cp *RemoteCheckPoint) Init(conn *WorkerConn) error { if conn != nil { - cp.db = conn + cp.dbConn = conn } else { - db, err := createConn(cp.cfg, cp.cfg.To, maxCheckPointTimeout) + cp.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + dbConn, err := createConn(cp.tctx, cp.cfg, cp.cfg.To) if err != nil { return err } - cp.db = db + cp.dbConn = dbConn } return cp.prepare() @@ -242,12 +243,12 @@ func (cp *RemoteCheckPoint) Init(conn *Conn) error { // Close implements CheckPoint.Close func (cp *RemoteCheckPoint) Close() { - closeConns(cp.tctx, cp.db) + closeConns(cp.tctx, cp.dbConn) } // ResetConn implements CheckPoint.ResetConn func (cp *RemoteCheckPoint) ResetConn() error { - return cp.db.ResetConn(cp.tctx) + return cp.dbConn.ResetConn(cp.tctx) } // Clear implements CheckPoint.Clear @@ -258,7 +259,7 @@ func (cp *RemoteCheckPoint) Clear() error { // delete all checkpoints sql2 := fmt.Sprintf("DELETE FROM `%s`.`%s` WHERE `id` = '%s'", cp.schema, cp.table, cp.id) args := make([]interface{}, 0) - _, err := cp.db.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) + _, err := cp.dbConn.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) if err != nil { return err } @@ -317,7 +318,7 @@ func (cp *RemoteCheckPoint) DeleteTablePoint(sourceSchema, sourceTable string) e // delete checkpoint sql2 := fmt.Sprintf("DELETE FROM `%s`.`%s` WHERE `id` = '%s' AND `cp_schema` = '%s' AND `cp_table` = '%s'", cp.schema, cp.table, cp.id, sourceSchema, sourceTable) args := make([]interface{}, 0) - _, err := cp.db.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) + _, err := cp.dbConn.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) if err != nil { return err } @@ -403,7 +404,7 @@ func (cp *RemoteCheckPoint) FlushPointsExcept(exceptTables [][]string, extraSQLs args = append(args, extraArgs[i]) } - _, err := cp.db.executeSQL(cp.tctx, sqls, args...) + _, err := cp.dbConn.executeSQL(cp.tctx, sqls, args...) if err != nil { return err } @@ -466,7 +467,7 @@ func (cp *RemoteCheckPoint) prepare() error { func (cp *RemoteCheckPoint) createSchema() error { sql2 := fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS `%s`", cp.schema) args := make([]interface{}, 0) - _, err := cp.db.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) + _, err := cp.dbConn.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) cp.tctx.L().Info("create checkpoint schema", zap.String("statement", sql2)) return err } @@ -485,7 +486,7 @@ func (cp *RemoteCheckPoint) createTable() error { UNIQUE KEY uk_id_schema_table (id, cp_schema, cp_table) )`, tableName) args := make([]interface{}, 0) - _, err := cp.db.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) + _, err := cp.dbConn.executeSQL(cp.tctx, []string{sql2}, [][]interface{}{args}...) cp.tctx.L().Info("create checkpoint table", zap.String("statement", sql2)) return err } @@ -493,7 +494,12 @@ func (cp *RemoteCheckPoint) createTable() error { // Load implements CheckPoint.Load func (cp *RemoteCheckPoint) Load() error { query := fmt.Sprintf("SELECT `cp_schema`, `cp_table`, `binlog_name`, `binlog_pos`, `is_global` FROM `%s`.`%s` WHERE `id`='%s'", cp.schema, cp.table, cp.id) - rows, err := cp.db.querySQL(cp.tctx, query) + rows, err := cp.dbConn.querySQL(cp.tctx, query) + defer func() { + if rows != nil { + rows.Close() + } + }() failpoint.Inject("LoadCheckpointFailed", func(val failpoint.Value) { err = tmysql.NewErr(uint16(val.(int))) @@ -503,7 +509,6 @@ func (cp *RemoteCheckPoint) Load() error { if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } - defer rows.Close() // checkpoints in DB have higher priority // if don't want to use checkpoint in DB, set `remove-previous-checkpoint` to `true` diff --git a/syncer/checkpoint_test.go b/syncer/checkpoint_test.go index 3463968706..93ad117552 100644 --- a/syncer/checkpoint_test.go +++ b/syncer/checkpoint_test.go @@ -21,7 +21,7 @@ import ( "strings" "github.com/pingcap/dm/dm/config" - "github.com/pingcap/dm/pkg/baseconn" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/retry" @@ -90,8 +90,10 @@ func (s *testCheckpointSuite) TestCheckPoint(c *C) { mock.ExpectExec(clearCheckPointSQL).WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectCommit() - // pass sqlmock baseConn directly - conn := &Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}} + dbConn, err := db.Conn(tcontext.Background().Context()) + c.Assert(err, IsNil) + conn := &WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} + err = cp.Init(conn) c.Assert(err, IsNil) cp.Clear() diff --git a/syncer/db.go b/syncer/db.go index 6e2114080c..ff1cadf93e 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -20,7 +20,7 @@ import ( "time" "github.com/pingcap/dm/dm/config" - "github.com/pingcap/dm/pkg/baseconn" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/gtid" "github.com/pingcap/dm/pkg/log" @@ -69,45 +69,88 @@ type binlogSize struct { size int64 } -// Conn represents a live DB connection -type Conn struct { - cfg *config.SubTaskConfig - baseConn *baseconn.BaseConn +func createBaseDB(dbCfg config.DBConfig) (*conn.BaseDB, error) { + db, err := conn.DefaultDBProvider.Apply(dbCfg) + if err != nil { + return nil, err + } + return db, nil } -// ResetConn reset baseConn.*DB's connection pool -func (conn *Conn) ResetConn(tctx *tcontext.Context) error { - if conn.baseConn == nil { - return terror.ErrDBUnExpect.Generate("database base connection not valid") - } - return conn.baseConn.ResetConn(tctx) +// UpStreamConn connect to upstream DB, use *sql.DB instead of *sql.Conn +type UpStreamConn struct { + cfg *config.SubTaskConfig + // TODO change to BaseDB + DB *sql.DB +} + +func (conn *UpStreamConn) getMasterStatus(flavor string) (mysql.Position, gtid.Set, error) { + return utils.GetMasterStatus(conn.DB, flavor) +} + +func (conn *UpStreamConn) getServerUUID(flavor string) (string, error) { + return utils.GetServerUUID(conn.DB, flavor) +} + +func (conn *UpStreamConn) getParser(ansiQuotesMode bool) (*parser.Parser, error) { + return utils.GetParser(conn.DB, ansiQuotesMode) } -func (conn *Conn) getMasterStatus(flavor string) (mysql.Position, gtid.Set, error) { - return utils.GetMasterStatus(conn.baseConn.DB, flavor) +func (conn *UpStreamConn) killConn(connID uint32) error { + return utils.KillConn(conn.DB, connID) } -func (conn *Conn) getServerUUID(flavor string) (string, error) { - return utils.GetServerUUID(conn.baseConn.DB, flavor) +func (conn *UpStreamConn) fetchAllDoTables(bw *filter.Filter) (map[string][]string, error) { + return utils.FetchAllDoTables(conn.DB, bw) } -func (conn *Conn) getParser(ansiQuotesMode bool) (*parser.Parser, error) { - return utils.GetParser(conn.baseConn.DB, ansiQuotesMode) +func (conn *UpStreamConn) countBinaryLogsSize(pos mysql.Position) (int64, error) { + return countBinaryLogsSize(pos, conn.DB) } -func (conn *Conn) killConn(connID uint32) error { - return utils.KillConn(conn.baseConn.DB, connID) +func createUpStreamConn(cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*UpStreamConn, error) { + baseDB, err := createBaseDB(dbCfg) + if err != nil { + return nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeUpstream) + } + return &UpStreamConn{DB: baseDB.DB, cfg: cfg}, nil } -func (conn *Conn) fetchAllDoTables(bw *filter.Filter) (map[string][]string, error) { - return utils.FetchAllDoTables(conn.baseConn.DB, bw) +func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { + err := conn.DB.Close() + if err != nil { + tctx.L().Error("fail to close baseConn connection", log.ShortError(err)) + } } -func (conn *Conn) countBinaryLogsSize(pos mysql.Position) (int64, error) { - return countBinaryLogsSize(pos, conn.baseConn.DB) +// WorkerConn represents a live DB connection +type WorkerConn struct { + cfg *config.SubTaskConfig + baseConn *conn.BaseConn + + baseDB *conn.BaseDB } -func (conn *Conn) querySQL(tctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { +// ResetConn reset one worker connection from specify *BaseDB +func (conn *WorkerConn) ResetConn(tctx *tcontext.Context) error { + if conn == nil || conn.baseDB == nil { + return terror.ErrDBDriverError.Generate("database not valid") + } + dbConn, err := conn.baseDB.GetBaseConn(tctx.Context()) + if err != nil { + return err + } + if conn.baseConn != nil { + err = conn.baseConn.Close() + if err != nil { + return err + } + } + conn.baseConn = dbConn + return nil +} + +func (conn *WorkerConn) querySQL(tctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { if conn == nil || conn.baseConn == nil { return nil, terror.ErrDBUnExpect.Generate("database base connection not valid") } @@ -145,7 +188,7 @@ func (conn *Conn) querySQL(tctx *tcontext.Context, query string, args ...interfa return ret.(*sql.Rows), nil } -func (conn *Conn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError func(error) bool, queries []string, args ...[]interface{}) (int, error) { +func (conn *WorkerConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError func(error) bool, queries []string, args ...[]interface{}) (int, error) { if len(queries) == 0 { return 0, nil } @@ -159,6 +202,16 @@ func (conn *Conn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError func( FirstRetryDuration: retryTimeout, BackoffStrategy: retry.Stable, IsRetryableFn: func(retryTime int, err error) bool { + if retry.IsConnectionError(err) { + err := conn.ResetConn(tctx) + if err != nil { + tctx.L().Warn("reset connection failed", zap.Int("retry", retryTime), + zap.String("queries", utils.TruncateInterface(queries, -1)), + zap.String("arguments", utils.TruncateInterface(args, -1))) + return false + } + return true + } if retry.IsRetryableError(err) { tctx.L().Warn("execute statements", zap.Int("retry", retryTime), zap.String("queries", utils.TruncateInterface(queries, -1)), @@ -176,6 +229,7 @@ func (conn *Conn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError func( func(ctx *tcontext.Context) (interface{}, error) { startTime := time.Now() ret, err := conn.baseConn.ExecuteSQLWithIgnoreError(ctx, ignoreError, queries, args...) + if err == nil { cost := time.Since(startTime) txnHistogram.WithLabelValues(conn.cfg.Name).Observe(cost.Seconds()) @@ -196,57 +250,59 @@ func (conn *Conn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError func( return ret.(int), nil } -func (conn *Conn) executeSQL(tctx *tcontext.Context, queries []string, args ...[]interface{}) (int, error) { +func (conn *WorkerConn) executeSQL(tctx *tcontext.Context, queries []string, args ...[]interface{}) (int, error) { return conn.executeSQLWithIgnore(tctx, nil, queries, args...) } -func createBaseConn(dbCfg config.DBConfig, timeout string, rawDBCfg *baseconn.RawDBConfig) (*baseconn.BaseConn, error) { - dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4&interpolateParams=true&readTimeout=%s&maxAllowedPacket=%d", - dbCfg.User, dbCfg.Password, dbCfg.Host, dbCfg.Port, timeout, *dbCfg.MaxAllowedPacket) - baseConn, err := baseconn.NewBaseConn(dbDSN, &retry.FiniteRetryStrategy{}, rawDBCfg) +func createConn(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*WorkerConn, error) { + baseDB, err := createBaseDB(dbCfg) if err != nil { - return nil, terror.DBErrorAdapt(err, terror.ErrDBDriverError) + return nil, err } - return baseConn, nil -} - -func createConn(cfg *config.SubTaskConfig, dbCfg config.DBConfig, timeout string) (*Conn, error) { - baseConn, err := createBaseConn(dbCfg, timeout, baseconn.DefaultRawDBConfig()) + baseConn, err := baseDB.GetBaseConn(tctx.Context()) if err != nil { - return nil, err + return nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) } - return &Conn{baseConn: baseConn, cfg: cfg}, nil + return &WorkerConn{baseDB: baseDB, baseConn: baseConn, cfg: cfg}, nil } -func createConns(cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int, timeout string) ([]*Conn, error) { - dbs := make([]*Conn, 0, count) - - rawDBCfg := &baseconn.RawDBConfig{ - MaxIdleConns: cfg.SyncerConfig.WorkerCount, - } - baseConn, err := createBaseConn(dbCfg, timeout, rawDBCfg) +func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int) ([]*WorkerConn, error) { + conns := make([]*WorkerConn, 0, count) + db, err := createBaseDB(dbCfg) if err != nil { return nil, err } for i := 0; i < count; i++ { - // TODO use *sql.Conn instead of *sql.DB - // share db by all conns - bc := &baseconn.BaseConn{baseConn.DB, baseConn.DSN, baseConn.RetryStrategy, rawDBCfg} - dbs = append(dbs, &Conn{baseConn: bc, cfg: cfg}) + dbConn, err := db.GetBaseConn(tctx.Context()) + if err != nil { + closeConns(tctx, conns...) + return nil, terror.WithScope(terror.ErrDBBadConn.Delegate(err), terror.ScopeDownstream) + } + conns = append(conns, &WorkerConn{baseDB: db, baseConn: dbConn, cfg: cfg}) } - return dbs, nil + return conns, nil } -func closeConns(tctx *tcontext.Context, conns ...*Conn) { +func closeConns(tctx *tcontext.Context, conns ...*WorkerConn) { + var db *conn.BaseDB for _, conn := range conns { + if db == nil { + db = conn.baseDB + } err := conn.baseConn.Close() if err != nil { tctx.L().Error("fail to close baseConn connection", log.ShortError(err)) } } + if db != nil { + err := db.Close() + if err != nil { + tctx.L().Error("fail to close baseDB", log.ShortError(err)) + } + } } -func getTableIndex(tctx *tcontext.Context, conn *Conn, table *table) error { +func getTableIndex(tctx *tcontext.Context, conn *WorkerConn, table *table) error { if table.schema == "" || table.name == "" { return terror.ErrDBUnExpect.Generate("schema/table is empty") } @@ -303,7 +359,7 @@ func getTableIndex(tctx *tcontext.Context, conn *Conn, table *table) error { return nil } -func getTableColumns(tctx *tcontext.Context, conn *Conn, table *table) error { +func getTableColumns(tctx *tcontext.Context, conn *WorkerConn, table *table) error { if table.schema == "" || table.name == "" { return terror.ErrDBUnExpect.Generate("schema/table is empty") } diff --git a/syncer/db_test.go b/syncer/db_test.go index c11ce86e8b..8f0222be62 100644 --- a/syncer/db_test.go +++ b/syncer/db_test.go @@ -29,7 +29,7 @@ import ( "go.uber.org/zap" "github.com/pingcap/dm/dm/config" - "github.com/pingcap/dm/pkg/baseconn" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/retry" @@ -145,9 +145,12 @@ func (s *testDBSuite) TestBinaryLogs(c *C) { func (s *testSyncerSuite) TestExecuteSQLSWithIgnore(c *C) { db, mock, err := sqlmock.New() - conn := &Conn{ - baseConn: &baseconn.BaseConn{ - DB: db, + c.Assert(err, IsNil) + dbConn, err := db.Conn(context.Background()) + c.Assert(err, IsNil) + conn := &WorkerConn{ + baseConn: &conn.BaseConn{ + DBConn: dbConn, RetryStrategy: &retry.FiniteRetryStrategy{}, }, cfg: &config.SubTaskConfig{ @@ -238,7 +241,7 @@ func (s *testDBSuite) TestTimezone(c *C) { s.resetBinlogSyncer(c) // we should not use `sql.DB.Exec` to do query which depends on session variables - // because `sql.DB.Exec` will choose a underlying Conn for every query from the connection pool + // because `sql.DB.Exec` will choose a underlying WorkerConn for every query from the connection pool // and different Conn using different session // ref: `sql.DB.Conn` // and `set @@global` is also not reasonable, because it can not affect sessions already exist diff --git a/syncer/online_ddl.go b/syncer/online_ddl.go index c117ff9612..3b1e132659 100644 --- a/syncer/online_ddl.go +++ b/syncer/online_ddl.go @@ -20,12 +20,12 @@ import ( "strings" "sync" - "github.com/pingcap/parser/ast" - "github.com/pingcap/tidb-tools/pkg/filter" - "github.com/pingcap/dm/dm/config" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/terror" + + "github.com/pingcap/parser/ast" + "github.com/pingcap/tidb-tools/pkg/filter" ) var ( @@ -79,7 +79,7 @@ type OnlineDDLStorage struct { cfg *config.SubTaskConfig - db *Conn + dbConn *WorkerConn schema string // schema name, set through task config table string // table name, now it's task name id string // now it is `server-id` used as MySQL slave @@ -106,11 +106,12 @@ func NewOnlineDDLStorage(newtctx *tcontext.Context, cfg *config.SubTaskConfig) * // Init initials online handler func (s *OnlineDDLStorage) Init() error { - db, err := createConn(s.cfg, s.cfg.To, maxCheckPointTimeout) + s.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + dbConn, err := createConn(s.tctx, s.cfg, s.cfg.To) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } - s.db = db + s.dbConn = dbConn err = s.prepare() if err != nil { @@ -126,7 +127,7 @@ func (s *OnlineDDLStorage) Load() error { defer s.Unlock() query := fmt.Sprintf("SELECT `ghost_schema`, `ghost_table`, `ddls` FROM `%s`.`%s` WHERE `id`='%s'", s.schema, s.table, s.id) - rows, err := s.db.querySQL(s.tctx, query) + rows, err := s.dbConn.querySQL(s.tctx, query) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } @@ -204,7 +205,7 @@ func (s *OnlineDDLStorage) Save(ghostSchema, ghostTable, realSchema, realTable, } query := fmt.Sprintf("REPLACE INTO `%s`.`%s`(`id`,`ghost_schema`, `ghost_table`, `ddls`) VALUES ('%s', '%s', '%s', '%s')", s.schema, s.table, s.id, ghostSchema, ghostTable, escapeSingleQuote(string(ddlsBytes))) - _, err = s.db.executeSQL(s.tctx, []string{query}) + _, err = s.dbConn.executeSQL(s.tctx, []string{query}) return terror.WithScope(err, terror.ScopeDownstream) } @@ -220,7 +221,7 @@ func (s *OnlineDDLStorage) Delete(ghostSchema, ghostTable string) error { // delete all checkpoints sql := fmt.Sprintf("DELETE FROM `%s`.`%s` WHERE `id` = '%s' and `ghost_schema` = '%s' and `ghost_table` = '%s'", s.schema, s.table, s.id, ghostSchema, ghostTable) - _, err := s.db.executeSQL(s.tctx, []string{sql}) + _, err := s.dbConn.executeSQL(s.tctx, []string{sql}) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } @@ -236,7 +237,7 @@ func (s *OnlineDDLStorage) Clear() error { // delete all checkpoints sql := fmt.Sprintf("DELETE FROM `%s`.`%s` WHERE `id` = '%s'", s.schema, s.table, s.id) - _, err := s.db.executeSQL(s.tctx, []string{sql}) + _, err := s.dbConn.executeSQL(s.tctx, []string{sql}) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } @@ -250,7 +251,7 @@ func (s *OnlineDDLStorage) Close() { s.Lock() defer s.Unlock() - closeConns(s.tctx, s.db) + closeConns(s.tctx, s.dbConn) } func (s *OnlineDDLStorage) prepare() error { @@ -266,7 +267,7 @@ func (s *OnlineDDLStorage) prepare() error { func (s *OnlineDDLStorage) createSchema() error { sql := fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS `%s`", s.schema) - _, err := s.db.executeSQL(s.tctx, []string{sql}) + _, err := s.dbConn.executeSQL(s.tctx, []string{sql}) return terror.WithScope(err, terror.ScopeDownstream) } @@ -280,7 +281,7 @@ func (s *OnlineDDLStorage) createTable() error { update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, UNIQUE KEY uk_id_schema_table (id, ghost_schema, ghost_table) )`, tableName) - _, err := s.db.executeSQL(s.tctx, []string{sql}) + _, err := s.dbConn.executeSQL(s.tctx, []string{sql}) return terror.WithScope(err, terror.ScopeDownstream) } diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index cf224a873c..e394d9f6e9 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -75,14 +75,14 @@ import ( "strings" "sync" - "github.com/siddontang/go-mysql/mysql" - "go.uber.org/zap" - "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/terror" shardmeta "github.com/pingcap/dm/syncer/sharding-meta" + + "github.com/siddontang/go-mysql/mysql" + "go.uber.org/zap" ) // ShardingGroup represents a sharding DDL sync group @@ -401,7 +401,8 @@ type ShardingGroupKeeper struct { shardMetaSchema string shardMetaTable string - db *Conn + + dbConn *WorkerConn tctx *tcontext.Context } @@ -451,16 +452,17 @@ func (k *ShardingGroupKeeper) AddGroup(targetSchema, targetTable string, sourceI } // Init does initialization staff -func (k *ShardingGroupKeeper) Init(conn *Conn) error { +func (k *ShardingGroupKeeper) Init(conn *WorkerConn) error { k.clear() if conn != nil { - k.db = conn + k.dbConn = conn } else { - db, err := createConn(k.cfg, k.cfg.To, maxDDLConnectionTimeout) + k.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) + dbConn, err := createConn(k.tctx, k.cfg, k.cfg.To) if err != nil { return err } - k.db = db + k.dbConn = dbConn } err := k.prepare() return err @@ -697,12 +699,12 @@ func (k *ShardingGroupKeeper) prepare() error { // Close closes sharding group keeper func (k *ShardingGroupKeeper) Close() { - closeConns(k.tctx, k.db) + closeConns(k.tctx, k.dbConn) } func (k *ShardingGroupKeeper) createSchema() error { stmt := fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS `%s`", k.shardMetaSchema) - _, err := k.db.executeSQL(k.tctx, []string{stmt}) + _, err := k.dbConn.executeSQL(k.tctx, []string{stmt}) k.tctx.L().Info("execute sql", zap.String("statement", stmt)) return terror.WithScope(err, terror.ScopeDownstream) } @@ -720,7 +722,7 @@ func (k *ShardingGroupKeeper) createTable() error { update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, UNIQUE KEY uk_source_id_table_id_source (source_id, target_table_id, source_table_id) )`, tableName) - _, err := k.db.executeSQL(k.tctx, []string{stmt}) + _, err := k.dbConn.executeSQL(k.tctx, []string{stmt}) k.tctx.L().Info("execute sql", zap.String("statement", stmt)) return terror.WithScope(err, terror.ScopeDownstream) } @@ -728,7 +730,7 @@ func (k *ShardingGroupKeeper) createTable() error { // LoadShardMeta implements CheckPoint.LoadShardMeta func (k *ShardingGroupKeeper) LoadShardMeta() (map[string]*shardmeta.ShardingMeta, error) { query := fmt.Sprintf("SELECT `target_table_id`, `source_table_id`, `active_index`, `is_global`, `data` FROM `%s`.`%s` WHERE `source_id`='%s'", k.shardMetaSchema, k.shardMetaTable, k.cfg.SourceID) - rows, err := k.db.querySQL(k.tctx, query) + rows, err := k.dbConn.querySQL(k.tctx, query) if err != nil { return nil, terror.WithScope(err, terror.ScopeDownstream) } diff --git a/syncer/syncer.go b/syncer/syncer.go index 1ebceefe01..bdc3303ee0 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -22,20 +22,6 @@ import ( "sync" "time" - "github.com/pingcap/errors" - "github.com/pingcap/failpoint" - "github.com/pingcap/parser" - "github.com/pingcap/parser/ast" - bf "github.com/pingcap/tidb-tools/pkg/binlog-filter" - cm "github.com/pingcap/tidb-tools/pkg/column-mapping" - "github.com/pingcap/tidb-tools/pkg/dbutil" - "github.com/pingcap/tidb-tools/pkg/filter" - router "github.com/pingcap/tidb-tools/pkg/table-router" - "github.com/siddontang/go-mysql/mysql" - "github.com/siddontang/go-mysql/replication" - "github.com/siddontang/go/sync2" - "go.uber.org/zap" - "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/dm/unit" @@ -50,6 +36,20 @@ import ( "github.com/pingcap/dm/pkg/utils" sm "github.com/pingcap/dm/syncer/safe-mode" operator "github.com/pingcap/dm/syncer/sql-operator" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/parser" + "github.com/pingcap/parser/ast" + bf "github.com/pingcap/tidb-tools/pkg/binlog-filter" + cm "github.com/pingcap/tidb-tools/pkg/column-mapping" + "github.com/pingcap/tidb-tools/pkg/dbutil" + "github.com/pingcap/tidb-tools/pkg/filter" + router "github.com/pingcap/tidb-tools/pkg/table-router" + "github.com/siddontang/go-mysql/mysql" + "github.com/siddontang/go-mysql/replication" + "github.com/siddontang/go/sync2" + "go.uber.org/zap" ) var ( @@ -146,9 +146,10 @@ type Syncer struct { cacheColumns map[string][]string // table columns cache: `target-schema`.`target-table` -> column names list genColsCache *GenColCache - fromDB *Conn - toDBs []*Conn - ddlDB *Conn + fromDB *UpStreamConn + + toDBConns []*WorkerConn + ddlDBConn *WorkerConn jobs []chan *job jobsClosed sync2.AtomicBool @@ -401,7 +402,7 @@ func (s *Syncer) Init() (err error) { // initShardingGroups initializes sharding groups according to source MySQL, filter rules and router rules // NOTE: now we don't support modify router rules after task has started -func (s *Syncer) initShardingGroups(conn *Conn) error { +func (s *Syncer) initShardingGroups(conn *WorkerConn) error { // fetch tables from source and filter them sourceTables, err := s.fromDB.fetchAllDoTables(s.bwList) if err != nil { @@ -506,19 +507,14 @@ func (s *Syncer) reset() { func (s *Syncer) resetDBs() error { var err error - // toDBs share the same `*sql.DB` in underlying `*baseconn.BaseConn`, currently the `BaseConn.ResetConn` - // can only reset the `*sql.DB` and point to the new `*sql.DB`, it is hard to reset all the `*sql.DB` by - // calling `BaseConn.ResetConn` once. On the other side if we simply change the underlying value of a - // `*sql.DB` by `*conn.DB = *db`, there exists some data race and invalid memory address in db driver. - // So we use the close and recreate way here. - closeConns(s.tctx, s.toDBs...) - s.toDBs, err = createConns(s.cfg, s.cfg.To, s.cfg.WorkerCount, maxDMLConnectionTimeout) - if err != nil { - return terror.WithScope(err, terror.ScopeDownstream) + for i := 0; i < len(s.toDBConns); i++ { + err = s.toDBConns[i].ResetConn(s.tctx) + if err != nil { + return terror.WithScope(err, terror.ScopeDownstream) + } } - s.tctx.L().Info("createDBs", zap.String("toDBs baseConn", fmt.Sprintf("%p", s.toDBs[0].baseConn.DB))) - err = s.ddlDB.ResetConn(s.tctx) + err = s.ddlDBConn.ResetConn(s.tctx) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } @@ -636,7 +632,7 @@ func (s *Syncer) clearAllTables() { s.genColsCache.reset() } -func (s *Syncer) getTableFromDB(db *Conn, schema string, name string) (*table, error) { +func (s *Syncer) getTableFromDB(db *WorkerConn, schema string, name string) (*table, error) { table := &table{} table.schema = schema table.name = name @@ -667,7 +663,7 @@ func (s *Syncer) getTable(schema string, table string) (*table, []string, error) return value, s.cacheColumns[key], nil } - db := s.toDBs[len(s.toDBs)-1] + db := s.toDBConns[len(s.toDBConns)-1] t, err := s.getTableFromDB(db, schema, table) if err != nil { return nil, nil, err @@ -856,7 +852,7 @@ func (s *Syncer) flushCheckPoints() error { return nil } -func (s *Syncer) sync(ctx *tcontext.Context, queueBucket string, db *Conn, jobChan chan *job) { +func (s *Syncer) sync(ctx *tcontext.Context, queueBucket string, db *WorkerConn, jobChan chan *job) { defer s.wg.Done() idx := 0 @@ -1055,7 +1051,7 @@ func (s *Syncer) Run(ctx context.Context) (err error) { go func(i int, n string) { ctx2, cancel := context.WithCancel(ctx) ctctx := s.tctx.WithContext(ctx2) - s.sync(ctctx, n, s.toDBs[i], s.jobs[i]) + s.sync(ctctx, n, s.toDBConns[i], s.jobs[i]) cancel() }(i, name) } @@ -1065,7 +1061,7 @@ func (s *Syncer) Run(ctx context.Context) (err error) { go func() { ctx2, cancel := context.WithCancel(ctx) ctctx := s.tctx.WithContext(ctx2) - s.sync(ctctx, adminQueueName, s.ddlDB, s.jobs[s.cfg.WorkerCount]) + s.sync(ctctx, adminQueueName, s.ddlDBConn, s.jobs[s.cfg.WorkerCount]) cancel() }() @@ -2021,23 +2017,28 @@ func (s *Syncer) printStatus(ctx context.Context) { func (s *Syncer) createDBs() error { var err error - s.fromDB, err = createConn(s.cfg, s.cfg.From, maxDMLConnectionTimeout) + dbCfg := s.cfg.From + dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout) + s.fromDB, err = createUpStreamConn(s.cfg, dbCfg) if err != nil { - return terror.WithScope(err, terror.ScopeUpstream) + return err } - s.toDBs = make([]*Conn, 0, s.cfg.WorkerCount) - s.toDBs, err = createConns(s.cfg, s.cfg.To, s.cfg.WorkerCount, maxDMLConnectionTimeout) + dbCfg = s.cfg.To + dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout, s.cfg.WorkerCount) + s.toDBConns, err = createConns(s.tctx, s.cfg, dbCfg, s.cfg.WorkerCount) if err != nil { - closeConns(s.tctx, s.fromDB) // release resources acquired before return with error - return terror.WithScope(err, terror.ScopeDownstream) + closeUpstreamConn(s.tctx, s.fromDB) // release resources acquired before return with error + return err } // baseConn for ddl - s.ddlDB, err = createConn(s.cfg, s.cfg.To, maxDDLConnectionTimeout) + dbCfg = s.cfg.To + dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) + s.ddlDBConn, err = createConn(s.tctx, s.cfg, dbCfg) if err != nil { - closeConns(s.tctx, s.fromDB) - closeConns(s.tctx, s.toDBs...) - return terror.WithScope(err, terror.ScopeDownstream) + closeUpstreamConn(s.tctx, s.fromDB) + closeConns(s.tctx, s.toDBConns...) + return err } return nil @@ -2045,9 +2046,9 @@ func (s *Syncer) createDBs() error { // closeConns closes all opened DBs, rollback for createConns func (s *Syncer) closeDBs() { - closeConns(s.tctx, s.fromDB) - closeConns(s.tctx, s.toDBs...) - closeConns(s.tctx, s.ddlDB) + closeUpstreamConn(s.tctx, s.fromDB) + closeConns(s.tctx, s.toDBConns...) + closeConns(s.tctx, s.ddlDBConn) } // record skip ddl/dml sqls' position @@ -2393,12 +2394,13 @@ func (s *Syncer) ExecuteDDL(ctx context.Context, execReq *pb.ExecDDLRequest) (<- func (s *Syncer) UpdateFromConfig(cfg *config.SubTaskConfig) error { s.Lock() defer s.Unlock() - s.fromDB.baseConn.Close() + s.fromDB.DB.Close() s.cfg.From = cfg.From var err error - s.fromDB, err = createConn(s.cfg, s.cfg.From, maxDMLConnectionTimeout) + s.cfg.From.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout) + s.fromDB, err = createUpStreamConn(s.cfg, s.cfg.From) if err != nil { s.tctx.L().Error("fail to create baseConn connection", log.ShortError(err)) return err diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 190e7877c1..5286320996 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -25,8 +25,8 @@ import ( "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" - "github.com/pingcap/dm/pkg/baseconn" "github.com/pingcap/dm/pkg/binlog/event" + "github.com/pingcap/dm/pkg/conn" "github.com/pingcap/dm/pkg/gtid" "github.com/pingcap/dm/pkg/log" parserpkg "github.com/pingcap/dm/pkg/parser" @@ -934,9 +934,11 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { } syncer := NewSyncer(s.cfg) - // use upstream db as mock downstream - syncer.fromDB = &Conn{baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}} - syncer.toDBs = []*Conn{{baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}}} + // use upstream dbConn as mock downstream + dbConn, err := db.Conn(context.Background()) + c.Assert(err, IsNil) + syncer.fromDB = &UpStreamConn{DB: db} + syncer.toDBConns = []*WorkerConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} syncer.reset() streamer, err := syncer.streamerProducer.generateStreamer(pos) @@ -1215,15 +1217,22 @@ func (s *testSyncerSuite) TestSharding(c *C) { // make syncer write to mock baseConn syncer := NewSyncer(s.cfg) - // fromDB mocks upstream db, db mocks downstream db - syncer.fromDB = &Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{fromDB, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}} - syncer.toDBs = []*Conn{{cfg: s.cfg, baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}}} - syncer.ddlDB = &Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}} + ctx := context.Background() + // fromDB mocks upstream dbConn, dbConn mocks downstream dbConn + syncer.fromDB = &UpStreamConn{cfg: s.cfg, DB: fromDB} + dbConn, err := db.Conn(ctx) + c.Assert(err, IsNil) + syncer.toDBConns = []*WorkerConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} + syncer.ddlDBConn = &WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} // mock syncer.Init() function, because we need to pass mock dbs to different members' init syncer.genRouter() - syncer.initShardingGroups(&Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{shardGroupDB, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}}) - syncer.checkpoint.Init(&Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{checkPointDB, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}}) + shardGroupDBConn, err := shardGroupDB.Conn(ctx) + c.Assert(err, IsNil) + checkPointDBConn, err := checkPointDB.Conn(ctx) + c.Assert(err, IsNil) + syncer.initShardingGroups(&WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}}}) + syncer.checkpoint.Init(&WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) syncer.reset() events := append(createEvents, s.generateEvents(_case.testEvents, c)...) syncer.streamerProducer = &MockStreamProducer{events} @@ -1281,7 +1290,6 @@ func (s *testSyncerSuite) TestSharding(c *C) { resultCh := make(chan pb.ProcessResult) s.mockCheckPointFlush(checkPointMock) - checkPointMock.ExpectClose() go syncer.Process(ctx, resultCh) @@ -1332,7 +1340,10 @@ func (s *testSyncerSuite) TestRun(c *C) { db, mock, err := sqlmock.New() c.Assert(err, IsNil) + dbConn, err := db.Conn(context.Background()) + c.Assert(err, IsNil) checkPointDB, checkPointMock, err := sqlmock.New() + checkPointDBConn, err := checkPointDB.Conn(context.Background()) c.Assert(err, IsNil) testJobs.jobs = testJobs.jobs[:0] @@ -1362,10 +1373,10 @@ func (s *testSyncerSuite) TestRun(c *C) { s.cfg.DisableCausality = false syncer := NewSyncer(s.cfg) - syncer.fromDB = &Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}} - syncer.toDBs = []*Conn{{cfg: s.cfg, baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}}, - {cfg: s.cfg, baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}}} - syncer.ddlDB = &Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{db, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}} + syncer.fromDB = &UpStreamConn{cfg: s.cfg, DB: db} + syncer.toDBConns = []*WorkerConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}, + {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} + syncer.ddlDBConn = &WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} c.Assert(syncer.Type(), Equals, pb.UnitType_Sync) syncer.columnMapping, err = cm.NewMapping(s.cfg.CaseSensitive, s.cfg.ColumnMappingRules) @@ -1379,7 +1390,7 @@ func (s *testSyncerSuite) TestRun(c *C) { checkPointMock.ExpectExec(fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s_syncer_checkpoint`", s.cfg.MetaSchema, s.cfg.Name)).WillReturnResult(sqlmock.NewResult(1, 1)) checkPointMock.ExpectCommit() - syncer.checkpoint.Init(&Conn{cfg: s.cfg, baseConn: &baseconn.BaseConn{checkPointDB, "", &retry.FiniteRetryStrategy{}, baseconn.DefaultRawDBConfig()}}) + syncer.checkpoint.Init(&WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) syncer.reset() events1 := mockBinlogEvents{ mockBinlogEvent{typ: DBCreate, args: []interface{}{"test_1"}}, From ba9c5889780ac26415b47518d2fa47b872b5cb75 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 16 Sep 2019 13:01:43 +0800 Subject: [PATCH 02/50] change upstream DB to BaseDB --- syncer/db.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/syncer/db.go b/syncer/db.go index ff1cadf93e..98c4b06d71 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -80,32 +80,31 @@ func createBaseDB(dbCfg config.DBConfig) (*conn.BaseDB, error) { // UpStreamConn connect to upstream DB, use *sql.DB instead of *sql.Conn type UpStreamConn struct { cfg *config.SubTaskConfig - // TODO change to BaseDB - DB *sql.DB + BaseDB *conn.BaseDB } func (conn *UpStreamConn) getMasterStatus(flavor string) (mysql.Position, gtid.Set, error) { - return utils.GetMasterStatus(conn.DB, flavor) + return utils.GetMasterStatus(conn.BaseDB.DB, flavor) } func (conn *UpStreamConn) getServerUUID(flavor string) (string, error) { - return utils.GetServerUUID(conn.DB, flavor) + return utils.GetServerUUID(conn.BaseDB.DB, flavor) } func (conn *UpStreamConn) getParser(ansiQuotesMode bool) (*parser.Parser, error) { - return utils.GetParser(conn.DB, ansiQuotesMode) + return utils.GetParser(conn.BaseDB.DB, ansiQuotesMode) } func (conn *UpStreamConn) killConn(connID uint32) error { - return utils.KillConn(conn.DB, connID) + return utils.KillConn(conn.BaseDB.DB, connID) } func (conn *UpStreamConn) fetchAllDoTables(bw *filter.Filter) (map[string][]string, error) { - return utils.FetchAllDoTables(conn.DB, bw) + return utils.FetchAllDoTables(conn.BaseDB.DB, bw) } func (conn *UpStreamConn) countBinaryLogsSize(pos mysql.Position) (int64, error) { - return countBinaryLogsSize(pos, conn.DB) + return countBinaryLogsSize(pos, conn.BaseDB.DB) } func createUpStreamConn(cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*UpStreamConn, error) { @@ -113,11 +112,11 @@ func createUpStreamConn(cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*UpSt if err != nil { return nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeUpstream) } - return &UpStreamConn{DB: baseDB.DB, cfg: cfg}, nil + return &UpStreamConn{BaseDB: baseDB, cfg: cfg}, nil } func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { - err := conn.DB.Close() + err := conn.BaseDB.Close() if err != nil { tctx.L().Error("fail to close baseConn connection", log.ShortError(err)) } From 25b7016602227097bf4e11a7803d26afb26a6c07 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 16 Sep 2019 13:12:32 +0800 Subject: [PATCH 03/50] update raw db config --- dm/config/subtask.go | 22 ++++++++++++++-------- syncer/db.go | 2 +- syncer/syncer.go | 4 +++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/dm/config/subtask.go b/dm/config/subtask.go index 47a36a8f24..d7b231f7c9 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -52,18 +52,24 @@ type RawDBConfig struct { } // DefaultRawDBConfig returns a default raw database config -func DefaultRawDBConfig(readTimeout string, maxIdleConns ...int) *RawDBConfig { - if len(maxIdleConns) == 0 { - return &RawDBConfig{ - ReadTimeout: readTimeout, - } - } +func DefaultRawDBConfig(readTimeout string) *RawDBConfig { return &RawDBConfig{ - MaxIdleConns: maxIdleConns[0], - ReadTimeout: readTimeout, + ReadTimeout: readTimeout, } } +// AddWriteTimeout adds writeTimeout for raw database +func (c *RawDBConfig) AddWriteTimeout(writeTimeout string) *RawDBConfig { + c.WriteTimeout = writeTimeout + return c +} + +// AddMaxIdleConns set maxIdleConns for raw database +func (c *RawDBConfig) AddMaxIdleConns(conns int) *RawDBConfig { + c.MaxIdleConns = conns + return c +} + // DBConfig is the DB configuration. type DBConfig struct { Host string `toml:"host" json:"host" yaml:"host"` diff --git a/syncer/db.go b/syncer/db.go index 98c4b06d71..54eba97fe6 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -79,7 +79,7 @@ func createBaseDB(dbCfg config.DBConfig) (*conn.BaseDB, error) { // UpStreamConn connect to upstream DB, use *sql.DB instead of *sql.Conn type UpStreamConn struct { - cfg *config.SubTaskConfig + cfg *config.SubTaskConfig BaseDB *conn.BaseDB } diff --git a/syncer/syncer.go b/syncer/syncer.go index bdc3303ee0..7ffecc2c2c 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -2025,7 +2025,9 @@ func (s *Syncer) createDBs() error { } dbCfg = s.cfg.To - dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout, s.cfg.WorkerCount) + dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout). + AddMaxIdleConns(s.cfg.WorkerCount) + s.toDBConns, err = createConns(s.tctx, s.cfg, dbCfg, s.cfg.WorkerCount) if err != nil { closeUpstreamConn(s.tctx, s.fromDB) // release resources acquired before return with error From 7ce2dac93a1db1ad22f63c10e39d7df52b327177 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 16 Sep 2019 13:41:34 +0800 Subject: [PATCH 04/50] fix test --- syncer/db.go | 1 + syncer/syncer.go | 2 +- syncer/syncer_test.go | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/syncer/db.go b/syncer/db.go index 54eba97fe6..d8f323c8bc 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -127,6 +127,7 @@ type WorkerConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn + // for workerConn to reset itself baseDB *conn.BaseDB } diff --git a/syncer/syncer.go b/syncer/syncer.go index 7ffecc2c2c..7bba10f60c 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -2396,7 +2396,7 @@ func (s *Syncer) ExecuteDDL(ctx context.Context, execReq *pb.ExecDDLRequest) (<- func (s *Syncer) UpdateFromConfig(cfg *config.SubTaskConfig) error { s.Lock() defer s.Unlock() - s.fromDB.DB.Close() + s.fromDB.BaseDB.Close() s.cfg.From = cfg.From diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 5286320996..227b9e44a9 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -937,7 +937,7 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { // use upstream dbConn as mock downstream dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) - syncer.fromDB = &UpStreamConn{DB: db} + syncer.fromDB = &UpStreamConn{BaseDB: &conn.BaseDB{db, nil}} syncer.toDBConns = []*WorkerConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} syncer.reset() @@ -1219,7 +1219,7 @@ func (s *testSyncerSuite) TestSharding(c *C) { ctx := context.Background() // fromDB mocks upstream dbConn, dbConn mocks downstream dbConn - syncer.fromDB = &UpStreamConn{cfg: s.cfg, DB: fromDB} + syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: &conn.BaseDB{fromDB, nil}} dbConn, err := db.Conn(ctx) c.Assert(err, IsNil) syncer.toDBConns = []*WorkerConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} @@ -1373,7 +1373,7 @@ func (s *testSyncerSuite) TestRun(c *C) { s.cfg.DisableCausality = false syncer := NewSyncer(s.cfg) - syncer.fromDB = &UpStreamConn{cfg: s.cfg, DB: db} + syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: &conn.BaseDB{db, nil}} syncer.toDBConns = []*WorkerConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}, {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} syncer.ddlDBConn = &WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} From ecf7bf5b505d3bb2aac977b3285049108ef28998 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 16 Sep 2019 13:52:45 +0800 Subject: [PATCH 05/50] remove useless code --- syncer/syncer.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/syncer/syncer.go b/syncer/syncer.go index 7bba10f60c..0d8164a27c 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -916,11 +916,6 @@ func (s *Syncer) sync(ctx *tcontext.Context, queueBucket string, db *WorkerConn, idx++ if sqlJob.tp == ddl { - err = executeSQLs() - if err != nil { - fatalF(err, pb.ErrorType_ExecSQL) - continue - } if sqlJob.ddlExecItem != nil && sqlJob.ddlExecItem.req != nil && !sqlJob.ddlExecItem.req.Exec { s.tctx.L().Info("ignore sharding DDLs", zap.Strings("ddls", sqlJob.ddls)) From f579445b701d4b1cf8a6205b2427df0c59fa63d7 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 16 Sep 2019 15:25:47 +0800 Subject: [PATCH 06/50] add resetConnFn instead of baseDB in syncer.WorkerConn to reset --- syncer/checkpoint.go | 9 ++++-- syncer/db.go | 59 ++++++++++++++++++---------------------- syncer/online_ddl.go | 7 +++-- syncer/sharding_group.go | 7 +++-- syncer/syncer.go | 19 +++++++------ 5 files changed, 54 insertions(+), 47 deletions(-) diff --git a/syncer/checkpoint.go b/syncer/checkpoint.go index 26fff07b1d..a1aa79ad50 100644 --- a/syncer/checkpoint.go +++ b/syncer/checkpoint.go @@ -20,6 +20,7 @@ import ( "time" "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" @@ -186,6 +187,7 @@ type RemoteCheckPoint struct { cfg *config.SubTaskConfig + db *conn.BaseDB dbConn *WorkerConn schema string // schema name, set through task config table string // table name, now it's task name @@ -231,10 +233,11 @@ func (cp *RemoteCheckPoint) Init(conn *WorkerConn) error { cp.dbConn = conn } else { cp.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) - dbConn, err := createConn(cp.tctx, cp.cfg, cp.cfg.To) + db, dbConn, err := createConn(cp.tctx, cp.cfg, cp.cfg.To) if err != nil { return err } + cp.db = db cp.dbConn = dbConn } @@ -243,12 +246,12 @@ func (cp *RemoteCheckPoint) Init(conn *WorkerConn) error { // Close implements CheckPoint.Close func (cp *RemoteCheckPoint) Close() { - closeConns(cp.tctx, cp.dbConn) + closeBaseDB(cp.tctx, cp.db) } // ResetConn implements CheckPoint.ResetConn func (cp *RemoteCheckPoint) ResetConn() error { - return cp.dbConn.ResetConn(cp.tctx) + return cp.dbConn.resetConn(cp.tctx) } // Clear implements CheckPoint.Clear diff --git a/syncer/db.go b/syncer/db.go index d8f323c8bc..19d87fa4f2 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -127,16 +127,16 @@ type WorkerConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn - // for workerConn to reset itself - baseDB *conn.BaseDB + // for workerConn to reset baseConn + resetConnFn func(*tcontext.Context) (*conn.BaseConn, error) } // ResetConn reset one worker connection from specify *BaseDB -func (conn *WorkerConn) ResetConn(tctx *tcontext.Context) error { - if conn == nil || conn.baseDB == nil { +func (conn *WorkerConn) resetConn(tctx *tcontext.Context) error { + if conn == nil { return terror.ErrDBDriverError.Generate("database not valid") } - dbConn, err := conn.baseDB.GetBaseConn(tctx.Context()) + dbConn, err := conn.resetConnFn(tctx) if err != nil { return err } @@ -203,7 +203,7 @@ func (conn *WorkerConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError BackoffStrategy: retry.Stable, IsRetryableFn: func(retryTime int, err error) bool { if retry.IsConnectionError(err) { - err := conn.ResetConn(tctx) + err := conn.resetConn(tctx) if err != nil { tctx.L().Warn("reset connection failed", zap.Int("retry", retryTime), zap.String("queries", utils.TruncateInterface(queries, -1)), @@ -254,51 +254,46 @@ func (conn *WorkerConn) executeSQL(tctx *tcontext.Context, queries []string, arg return conn.executeSQLWithIgnore(tctx, nil, queries, args...) } -func createConn(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*WorkerConn, error) { +func createConn(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*conn.BaseDB, *WorkerConn, error) { baseDB, err := createBaseDB(dbCfg) if err != nil { - return nil, err + return nil, nil, err } baseConn, err := baseDB.GetBaseConn(tctx.Context()) if err != nil { - return nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) + return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) + } + resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { + return baseDB.GetBaseConn(tctx.Context()) } - return &WorkerConn{baseDB: baseDB, baseConn: baseConn, cfg: cfg}, nil + return baseDB, &WorkerConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}, nil } -func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int) ([]*WorkerConn, error) { +func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int) (*conn.BaseDB, []*WorkerConn, error) { conns := make([]*WorkerConn, 0, count) db, err := createBaseDB(dbCfg) if err != nil { - return nil, err + return nil, nil, err } for i := 0; i < count; i++ { dbConn, err := db.GetBaseConn(tctx.Context()) if err != nil { - closeConns(tctx, conns...) - return nil, terror.WithScope(terror.ErrDBBadConn.Delegate(err), terror.ScopeDownstream) + closeBaseDB(tctx, db) + return nil, nil, terror.WithScope(terror.ErrDBBadConn.Delegate(err), terror.ScopeDownstream) } - conns = append(conns, &WorkerConn{baseDB: db, baseConn: dbConn, cfg: cfg}) + resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { + return db.GetBaseConn(tctx.Context()) + } + conns = append(conns, &WorkerConn{baseConn: dbConn, cfg: cfg, resetConnFn: resetConnFn}) } - return conns, nil + return db, conns, nil } -func closeConns(tctx *tcontext.Context, conns ...*WorkerConn) { - var db *conn.BaseDB - for _, conn := range conns { - if db == nil { - db = conn.baseDB - } - err := conn.baseConn.Close() - if err != nil { - tctx.L().Error("fail to close baseConn connection", log.ShortError(err)) - } - } - if db != nil { - err := db.Close() - if err != nil { - tctx.L().Error("fail to close baseDB", log.ShortError(err)) - } +// close baseDB to release all connection generated by this baseDB and this baseDB +func closeBaseDB(tctx *tcontext.Context, baseDB *conn.BaseDB) { + err := baseDB.Close() + if err != nil { + tctx.L().Error("fail to close baseDB", log.ShortError(err)) } } diff --git a/syncer/online_ddl.go b/syncer/online_ddl.go index 3b1e132659..b89483fa3f 100644 --- a/syncer/online_ddl.go +++ b/syncer/online_ddl.go @@ -21,6 +21,7 @@ import ( "sync" "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/terror" @@ -79,6 +80,7 @@ type OnlineDDLStorage struct { cfg *config.SubTaskConfig + db *conn.BaseDB dbConn *WorkerConn schema string // schema name, set through task config table string // table name, now it's task name @@ -107,10 +109,11 @@ func NewOnlineDDLStorage(newtctx *tcontext.Context, cfg *config.SubTaskConfig) * // Init initials online handler func (s *OnlineDDLStorage) Init() error { s.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) - dbConn, err := createConn(s.tctx, s.cfg, s.cfg.To) + db, dbConn, err := createConn(s.tctx, s.cfg, s.cfg.To) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } + s.db = db s.dbConn = dbConn err = s.prepare() @@ -251,7 +254,7 @@ func (s *OnlineDDLStorage) Close() { s.Lock() defer s.Unlock() - closeConns(s.tctx, s.dbConn) + closeBaseDB(s.tctx, s.db) } func (s *OnlineDDLStorage) prepare() error { diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index e394d9f6e9..daf1720ae6 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -77,6 +77,7 @@ import ( "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/terror" shardmeta "github.com/pingcap/dm/syncer/sharding-meta" @@ -402,6 +403,7 @@ type ShardingGroupKeeper struct { shardMetaSchema string shardMetaTable string + db *conn.BaseDB dbConn *WorkerConn tctx *tcontext.Context @@ -458,10 +460,11 @@ func (k *ShardingGroupKeeper) Init(conn *WorkerConn) error { k.dbConn = conn } else { k.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) - dbConn, err := createConn(k.tctx, k.cfg, k.cfg.To) + db, dbConn, err := createConn(k.tctx, k.cfg, k.cfg.To) if err != nil { return err } + k.db = db k.dbConn = dbConn } err := k.prepare() @@ -699,7 +702,7 @@ func (k *ShardingGroupKeeper) prepare() error { // Close closes sharding group keeper func (k *ShardingGroupKeeper) Close() { - closeConns(k.tctx, k.dbConn) + closeBaseDB(k.tctx, k.db) } func (k *ShardingGroupKeeper) createSchema() error { diff --git a/syncer/syncer.go b/syncer/syncer.go index 0d8164a27c..a6538e0442 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/dm/unit" "github.com/pingcap/dm/pkg/binlog" + "github.com/pingcap/dm/pkg/conn" tcontext "github.com/pingcap/dm/pkg/context" fr "github.com/pingcap/dm/pkg/func-rollback" "github.com/pingcap/dm/pkg/gtid" @@ -148,7 +149,9 @@ type Syncer struct { fromDB *UpStreamConn + toDB *conn.BaseDB toDBConns []*WorkerConn + ddlDB *conn.BaseDB ddlDBConn *WorkerConn jobs []chan *job @@ -508,13 +511,13 @@ func (s *Syncer) resetDBs() error { var err error for i := 0; i < len(s.toDBConns); i++ { - err = s.toDBConns[i].ResetConn(s.tctx) + err = s.toDBConns[i].resetConn(s.tctx) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } } - err = s.ddlDBConn.ResetConn(s.tctx) + err = s.ddlDBConn.resetConn(s.tctx) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } @@ -2023,7 +2026,7 @@ func (s *Syncer) createDBs() error { dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout). AddMaxIdleConns(s.cfg.WorkerCount) - s.toDBConns, err = createConns(s.tctx, s.cfg, dbCfg, s.cfg.WorkerCount) + s.toDB, s.toDBConns, err = createConns(s.tctx, s.cfg, dbCfg, s.cfg.WorkerCount) if err != nil { closeUpstreamConn(s.tctx, s.fromDB) // release resources acquired before return with error return err @@ -2031,21 +2034,21 @@ func (s *Syncer) createDBs() error { // baseConn for ddl dbCfg = s.cfg.To dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) - s.ddlDBConn, err = createConn(s.tctx, s.cfg, dbCfg) + s.ddlDB, s.ddlDBConn, err = createConn(s.tctx, s.cfg, dbCfg) if err != nil { closeUpstreamConn(s.tctx, s.fromDB) - closeConns(s.tctx, s.toDBConns...) + closeBaseDB(s.tctx, s.toDB) return err } return nil } -// closeConns closes all opened DBs, rollback for createConns +// closeBaseDB closes all opened DBs, rollback for createConns func (s *Syncer) closeDBs() { closeUpstreamConn(s.tctx, s.fromDB) - closeConns(s.tctx, s.toDBConns...) - closeConns(s.tctx, s.ddlDBConn) + closeBaseDB(s.tctx, s.toDB) + closeBaseDB(s.tctx, s.ddlDB) } // record skip ddl/dml sqls' position From 2f90c6890b3d16ae157def9460c4b9e586b08366 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 17 Sep 2019 19:05:45 +0800 Subject: [PATCH 07/50] Update loader/checkpoint.go Co-Authored-By: amyangfei --- loader/checkpoint.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader/checkpoint.go b/loader/checkpoint.go index 94413b270f..4cb27a817e 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -299,7 +299,7 @@ func (cp *RemoteCheckPoint) Init(filename string, endPos int64) error { func (cp *RemoteCheckPoint) Close() { err := cp.conn.Close() if err != nil { - cp.tctx.L().Error("close checkpoint connection error", log.ShortError(err)) + cp.tctx.L().Error("close checkpoint connection", log.ShortError(err)) } err = cp.db.Close() if err != nil { From 958509107b9457c2f9169e30ca7dccb43310e4e0 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 17 Sep 2019 19:05:59 +0800 Subject: [PATCH 08/50] Update loader/checkpoint.go Co-Authored-By: amyangfei --- loader/checkpoint.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader/checkpoint.go b/loader/checkpoint.go index 4cb27a817e..f28d308243 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -303,7 +303,7 @@ func (cp *RemoteCheckPoint) Close() { } err = cp.db.Close() if err != nil { - cp.tctx.L().Error("close checkpoint db error", log.ShortError(err)) + cp.tctx.L().Error("close checkpoint db", log.ShortError(err)) } } From d8e92ccdef43e8c8c43e980eab9a3bec4505e451 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 17 Sep 2019 19:15:43 +0800 Subject: [PATCH 09/50] address comments --- loader/checkpoint.go | 2 +- loader/loader.go | 4 ++-- syncer/db.go | 23 ++++++++++++++--------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/loader/checkpoint.go b/loader/checkpoint.go index 94413b270f..691b8c4e42 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -66,7 +66,7 @@ type CheckPoint interface { // RemoteCheckPoint implements CheckPoint by saving status in remote database system, mostly in TiDB. type RemoteCheckPoint struct { db *conn.BaseDB - conn *WorkerConn // NOTE: use dbutil in tidb-tools later + conn *WorkerConn id string schema string table string diff --git a/loader/loader.go b/loader/loader.go index abe8c33798..1f504e7444 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -975,13 +975,13 @@ func fetchMatchedLiteral(ctx *tcontext.Context, router *router.Table, schema, ta func (l *Loader) restoreData(ctx context.Context) error { begin := time.Now() - db, conn, err := createConn(ctx, l.cfg) + baseDB, conn, err := createConn(ctx, l.cfg) if err != nil { return err } defer func() { conn.baseConn.Close() - db.Close() + baseDB.Close() }() dispatchMap := make(map[string]*fileJob) diff --git a/syncer/db.go b/syncer/db.go index 19d87fa4f2..96d2705c32 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -116,9 +116,11 @@ func createUpStreamConn(cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*UpSt } func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { - err := conn.BaseDB.Close() - if err != nil { - tctx.L().Error("fail to close baseConn connection", log.ShortError(err)) + if conn != nil { + err := conn.BaseDB.Close() + if err != nil { + tctx.L().Error("fail to close baseConn connection", log.ShortError(err)) + } } } @@ -203,11 +205,12 @@ func (conn *WorkerConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError BackoffStrategy: retry.Stable, IsRetryableFn: func(retryTime int, err error) bool { if retry.IsConnectionError(err) { - err := conn.resetConn(tctx) + err = conn.resetConn(tctx) if err != nil { - tctx.L().Warn("reset connection failed", zap.Int("retry", retryTime), + tctx.L().Error("reset connection failed", zap.Int("retry", retryTime), zap.String("queries", utils.TruncateInterface(queries, -1)), - zap.String("arguments", utils.TruncateInterface(args, -1))) + zap.String("arguments", utils.TruncateInterface(args, -1)), + log.ShortError(err)) return false } return true @@ -291,9 +294,11 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config // close baseDB to release all connection generated by this baseDB and this baseDB func closeBaseDB(tctx *tcontext.Context, baseDB *conn.BaseDB) { - err := baseDB.Close() - if err != nil { - tctx.L().Error("fail to close baseDB", log.ShortError(err)) + if baseDB != nil { + err := baseDB.Close() + if err != nil { + tctx.L().Error("fail to close baseDB", log.ShortError(err)) + } } } From a710aff81ced297c63be4138398a37167e949a63 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 17 Sep 2019 19:32:57 +0800 Subject: [PATCH 10/50] address comment --- syncer/db.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/syncer/db.go b/syncer/db.go index 96d2705c32..545ef612f8 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -142,12 +142,6 @@ func (conn *WorkerConn) resetConn(tctx *tcontext.Context) error { if err != nil { return err } - if conn.baseConn != nil { - err = conn.baseConn.Close() - if err != nil { - return err - } - } conn.baseConn = dbConn return nil } From ed5c1a99e9504695c4ba84dde8549aede7d58fe3 Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 18 Sep 2019 17:38:01 +0800 Subject: [PATCH 11/50] address comments --- dm/config/subtask.go | 13 ++++++++----- loader/checkpoint.go | 7 ++++--- loader/db.go | 21 +++++++++++---------- loader/loader.go | 10 +++++----- pkg/conn/baseconn.go | 4 ++-- pkg/conn/baseconn_test.go | 5 ++--- pkg/conn/basedb.go | 19 ++++++++++++------- pkg/conn/basedb_test.go | 5 +++-- pkg/retry/errors.go | 2 -- syncer/checkpoint.go | 7 ++++--- syncer/checkpoint_test.go | 2 +- syncer/db.go | 27 ++++++++++++++------------- syncer/db_test.go | 4 ++-- syncer/online_ddl.go | 2 +- syncer/sharding_group.go | 4 ++-- syncer/syncer.go | 12 ++++++------ syncer/syncer_test.go | 16 ++++++++-------- 17 files changed, 85 insertions(+), 75 deletions(-) diff --git a/dm/config/subtask.go b/dm/config/subtask.go index d7b231f7c9..b0ab4a2088 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -58,15 +58,18 @@ func DefaultRawDBConfig(readTimeout string) *RawDBConfig { } } -// AddWriteTimeout adds writeTimeout for raw database -func (c *RawDBConfig) AddWriteTimeout(writeTimeout string) *RawDBConfig { +// SetWriteTimeout adds writeTimeout for raw database +func (c *RawDBConfig) SetWriteTimeout(writeTimeout string) *RawDBConfig { c.WriteTimeout = writeTimeout return c } -// AddMaxIdleConns set maxIdleConns for raw database -func (c *RawDBConfig) AddMaxIdleConns(conns int) *RawDBConfig { - c.MaxIdleConns = conns +// SetMaxIdleConns set maxIdleConns for raw database +// set value < 0 then do nothing (default 2 idle connections) +// set value == 0 then no idle connection +// set value > 0 then conns idle connections +func (c *RawDBConfig) SetMaxIdleConns(value int) *RawDBConfig { + c.MaxIdleConns = value return c } diff --git a/loader/checkpoint.go b/loader/checkpoint.go index b5805dd3f3..a3b1567777 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -64,9 +64,10 @@ type CheckPoint interface { } // RemoteCheckPoint implements CheckPoint by saving status in remote database system, mostly in TiDB. +// it's not thread-safe type RemoteCheckPoint struct { db *conn.BaseDB - conn *WorkerConn + conn *DBConn id string schema string table string @@ -76,7 +77,7 @@ type RemoteCheckPoint struct { } func newRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id string) (CheckPoint, error) { - db, conn, err := createConn(tctx.Context(), cfg) + db, dbConn, err := createConn(tctx.Context(), cfg) if err != nil { return nil, err } @@ -85,7 +86,7 @@ func newRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id s cp := &RemoteCheckPoint{ db: db, - conn: conn, + conn: dbConn, id: id, restoringFiles: make(map[string]map[string]FilePosSet), finishedTables: make(map[string]struct{}), diff --git a/loader/db.go b/loader/db.go index f8c4af09b8..c8f917324e 100644 --- a/loader/db.go +++ b/loader/db.go @@ -35,13 +35,14 @@ import ( "github.com/pingcap/dm/pkg/utils" ) -// WorkerConn represents a live DB connection -type WorkerConn struct { +// DBConn represents a live DB connection +// it's not thread-safe +type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn } -func (conn *WorkerConn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { +func (conn *DBConn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { if conn == nil || conn.baseConn == nil { return nil, terror.ErrDBUnExpect.Generate("database connection not valid") } @@ -90,7 +91,7 @@ func (conn *WorkerConn) querySQL(ctx *tcontext.Context, query string, args ...in return ret.(*sql.Rows), nil } -func (conn *WorkerConn) executeSQL(ctx *tcontext.Context, queries []string, args ...[]interface{}) error { +func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ...[]interface{}) error { if len(queries) == 0 { return nil } @@ -150,14 +151,14 @@ func (conn *WorkerConn) executeSQL(ctx *tcontext.Context, queries []string, args } // Close release db connection resource, return it to BaseDB.db connection pool -func (conn *WorkerConn) Close() error { +func (conn *DBConn) Close() error { if conn == nil || conn.baseConn == nil { return nil } return conn.baseConn.Close() } -func createConn(ctx context.Context, cfg *config.SubTaskConfig) (*conn.BaseDB, *WorkerConn, error) { +func createConn(ctx context.Context, cfg *config.SubTaskConfig) (*conn.BaseDB, *DBConn, error) { baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) if err != nil { return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) @@ -166,15 +167,15 @@ func createConn(ctx context.Context, cfg *config.SubTaskConfig) (*conn.BaseDB, * if err != nil { return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) } - return baseDB, &WorkerConn{baseConn: baseConn, cfg: cfg}, nil + return baseDB, &DBConn{baseConn: baseConn, cfg: cfg}, nil } -func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*WorkerConn, error) { +func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*DBConn, error) { baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) if err != nil { return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) } - conns := make([]*WorkerConn, 0, workerCount) + conns := make([]*DBConn, 0, workerCount) for i := 0; i < workerCount; i++ { baseConn, err := baseDB.GetBaseConn(tctx.Context()) if err != nil { @@ -184,7 +185,7 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount } return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) } - conns = append(conns, &WorkerConn{baseConn: baseConn, cfg: cfg}) + conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg}) } return baseDB, conns, nil } diff --git a/loader/loader.go b/loader/loader.go index 1f504e7444..8a7ccb2e35 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -77,7 +77,7 @@ type Worker struct { id int cfg *config.SubTaskConfig checkPoint CheckPoint - conn *WorkerConn + conn *DBConn wg sync.WaitGroup jobQueue chan *dataJob loader *Loader @@ -338,7 +338,7 @@ type Loader struct { closed sync2.AtomicBool toDB *conn.BaseDB - toDBConns []*WorkerConn + toDBConns []*DBConn totalDataSize sync2.AtomicInt64 totalFileCount sync2.AtomicInt64 // schema + table + data @@ -862,7 +862,7 @@ func (l *Loader) prepare() error { } // restoreSchema creates schema -func (l *Loader) restoreSchema(conn *WorkerConn, sqlFile, schema string) error { +func (l *Loader) restoreSchema(conn *DBConn, sqlFile, schema string) error { err := l.restoreStructure(conn, sqlFile, schema, "") if err != nil { if isErrDBExists(err) { @@ -875,7 +875,7 @@ func (l *Loader) restoreSchema(conn *WorkerConn, sqlFile, schema string) error { } // restoreTable creates table -func (l *Loader) restoreTable(conn *WorkerConn, sqlFile, schema, table string) error { +func (l *Loader) restoreTable(conn *DBConn, sqlFile, schema, table string) error { err := l.restoreStructure(conn, sqlFile, schema, table) if err != nil { if isErrTableExists(err) { @@ -888,7 +888,7 @@ func (l *Loader) restoreTable(conn *WorkerConn, sqlFile, schema, table string) e } // restoreStruture creates schema or table -func (l *Loader) restoreStructure(conn *WorkerConn, sqlFile string, schema string, table string) error { +func (l *Loader) restoreStructure(conn *DBConn, sqlFile string, schema string, table string) error { f, err := os.Open(sqlFile) if err != nil { return terror.ErrLoadUnitReadSchemaFile.Delegate(err) diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 1cbfd2c780..4ba031390e 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -33,11 +33,11 @@ type BaseConn struct { } // newBaseConn builds BaseConn to connect real DB -func newBaseConn(conn *sql.Conn, strategy retry.Strategy) (*BaseConn, error) { +func newBaseConn(conn *sql.Conn, strategy retry.Strategy) *BaseConn { if strategy == nil { strategy = &retry.FiniteRetryStrategy{} } - return &BaseConn{conn, strategy}, nil + return &BaseConn{conn, strategy} } // SetRetryStrategy set retry strategy for baseConn diff --git a/pkg/conn/baseconn_test.go b/pkg/conn/baseconn_test.go index dd57c51e06..4da2347c02 100644 --- a/pkg/conn/baseconn_test.go +++ b/pkg/conn/baseconn_test.go @@ -35,11 +35,10 @@ type testBaseConnSuite struct { } func (t *testBaseConnSuite) TestBaseConn(c *C) { - baseConn, err := newBaseConn(nil, nil) - c.Assert(err, IsNil) + baseConn := newBaseConn(nil, nil) tctx := tcontext.Background() - err = baseConn.SetRetryStrategy(nil) + err := baseConn.SetRetryStrategy(nil) c.Assert(err, IsNil) _, err = baseConn.QuerySQL(tctx, "select 1") diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 5cfdabe60a..ad52710089 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -38,11 +38,12 @@ func init() { DefaultDBProvider = &defaultDBProvider{} } +// Apply will build BaseDB with DBConfig func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4&interpolateParams=true&maxAllowedPacket=%d", config.User, config.Password, config.Host, config.Port, *config.MaxAllowedPacket) - maxIdleConn := 0 + maxIdleConn := -1 rawCfg := config.RawDBCfg if rawCfg != nil { if rawCfg.ReadTimeout != "" { @@ -54,14 +55,13 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { maxIdleConn = rawCfg.MaxIdleConns } db, err := sql.Open("mysql", dsn) - - if maxIdleConn > 0 { - db.SetMaxIdleConns(maxIdleConn) - } - if err != nil { return nil, terror.ErrDBDriverError.Delegate(err) } + if maxIdleConn >= 0 { + db.SetMaxIdleConns(maxIdleConn) + } + return &BaseDB{db, &retry.FiniteRetryStrategy{}}, nil } @@ -78,7 +78,12 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { if err != nil { return nil, terror.ErrDBDriverError.Delegate(err) } - return newBaseConn(conn, d.Retry) + err = conn.PingContext(ctx) + if err != nil { + return nil, terror.ErrDBDriverError.Delegate(err) + } + + return newBaseConn(conn, d.Retry), nil } // Close release db resource diff --git a/pkg/conn/basedb_test.go b/pkg/conn/basedb_test.go index f9e8c78730..9a260e70bf 100644 --- a/pkg/conn/basedb_test.go +++ b/pkg/conn/basedb_test.go @@ -16,6 +16,7 @@ package conn import ( "github.com/DATA-DOG/go-sqlmock" . "github.com/pingcap/check" + tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/retry" ) @@ -25,7 +26,7 @@ var _ = Suite(&testBaseDBSuite{}) type testBaseDBSuite struct { } -func (t *testBaseDBSuite) TestBaseDB(c *C) { +func (t *testBaseDBSuite) TestGetBaseConn(c *C) { db, mock, err := sqlmock.New() c.Assert(err, IsNil) @@ -47,7 +48,7 @@ func (t *testBaseDBSuite) TestBaseDB(c *C) { c.Assert(err, IsNil) ids = append(ids, id) } - c.Assert(len(ids), Equals, 1) + c.Assert(ids, HasLen, 1) c.Assert(ids[0], Equals, 1) mock.ExpectBegin() diff --git a/pkg/retry/errors.go b/pkg/retry/errors.go index dae5919b62..558352fea3 100644 --- a/pkg/retry/errors.go +++ b/pkg/retry/errors.go @@ -43,8 +43,6 @@ func IsConnectionError(err error) bool { switch err { case driver.ErrBadConn: return true - case mysql.ErrInvalidConn: - return true } return false } diff --git a/syncer/checkpoint.go b/syncer/checkpoint.go index a1aa79ad50..9d09ec1959 100644 --- a/syncer/checkpoint.go +++ b/syncer/checkpoint.go @@ -123,7 +123,7 @@ func (b *binlogPoint) String() string { // because, when restarting to continue the sync, all sharding DDLs must try-sync again type CheckPoint interface { // Init initializes the CheckPoint - Init(conn *WorkerConn) error + Init(conn *DBConn) error // Close closes the CheckPoint Close() @@ -182,13 +182,14 @@ type CheckPoint interface { // RemoteCheckPoint implements CheckPoint // which using target database to store info // NOTE: now we sync from relay log, so not add GTID support yet +// it's not thread-safe type RemoteCheckPoint struct { sync.RWMutex cfg *config.SubTaskConfig db *conn.BaseDB - dbConn *WorkerConn + dbConn *DBConn schema string // schema name, set through task config table string // table name, now it's task name id string // checkpoint ID, now it is `source-id` @@ -228,7 +229,7 @@ func NewRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id s } // Init implements CheckPoint.Init -func (cp *RemoteCheckPoint) Init(conn *WorkerConn) error { +func (cp *RemoteCheckPoint) Init(conn *DBConn) error { if conn != nil { cp.dbConn = conn } else { diff --git a/syncer/checkpoint_test.go b/syncer/checkpoint_test.go index 93ad117552..f55b85c1ff 100644 --- a/syncer/checkpoint_test.go +++ b/syncer/checkpoint_test.go @@ -92,7 +92,7 @@ func (s *testCheckpointSuite) TestCheckPoint(c *C) { dbConn, err := db.Conn(tcontext.Background().Context()) c.Assert(err, IsNil) - conn := &WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} + conn := &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} err = cp.Init(conn) c.Assert(err, IsNil) diff --git a/syncer/db.go b/syncer/db.go index 545ef612f8..dde0bd3071 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -124,8 +124,9 @@ func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { } } -// WorkerConn represents a live DB connection -type WorkerConn struct { +// DBConn represents a live DB connection +// it's not thread-safe +type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn @@ -134,7 +135,7 @@ type WorkerConn struct { } // ResetConn reset one worker connection from specify *BaseDB -func (conn *WorkerConn) resetConn(tctx *tcontext.Context) error { +func (conn *DBConn) resetConn(tctx *tcontext.Context) error { if conn == nil { return terror.ErrDBDriverError.Generate("database not valid") } @@ -146,7 +147,7 @@ func (conn *WorkerConn) resetConn(tctx *tcontext.Context) error { return nil } -func (conn *WorkerConn) querySQL(tctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { +func (conn *DBConn) querySQL(tctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { if conn == nil || conn.baseConn == nil { return nil, terror.ErrDBUnExpect.Generate("database base connection not valid") } @@ -184,7 +185,7 @@ func (conn *WorkerConn) querySQL(tctx *tcontext.Context, query string, args ...i return ret.(*sql.Rows), nil } -func (conn *WorkerConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError func(error) bool, queries []string, args ...[]interface{}) (int, error) { +func (conn *DBConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError func(error) bool, queries []string, args ...[]interface{}) (int, error) { if len(queries) == 0 { return 0, nil } @@ -247,11 +248,11 @@ func (conn *WorkerConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError return ret.(int), nil } -func (conn *WorkerConn) executeSQL(tctx *tcontext.Context, queries []string, args ...[]interface{}) (int, error) { +func (conn *DBConn) executeSQL(tctx *tcontext.Context, queries []string, args ...[]interface{}) (int, error) { return conn.executeSQLWithIgnore(tctx, nil, queries, args...) } -func createConn(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*conn.BaseDB, *WorkerConn, error) { +func createConn(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*conn.BaseDB, *DBConn, error) { baseDB, err := createBaseDB(dbCfg) if err != nil { return nil, nil, err @@ -263,11 +264,11 @@ func createConn(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config. resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { return baseDB.GetBaseConn(tctx.Context()) } - return baseDB, &WorkerConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}, nil + return baseDB, &DBConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}, nil } -func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int) (*conn.BaseDB, []*WorkerConn, error) { - conns := make([]*WorkerConn, 0, count) +func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int) (*conn.BaseDB, []*DBConn, error) { + conns := make([]*DBConn, 0, count) db, err := createBaseDB(dbCfg) if err != nil { return nil, nil, err @@ -281,7 +282,7 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { return db.GetBaseConn(tctx.Context()) } - conns = append(conns, &WorkerConn{baseConn: dbConn, cfg: cfg, resetConnFn: resetConnFn}) + conns = append(conns, &DBConn{baseConn: dbConn, cfg: cfg, resetConnFn: resetConnFn}) } return db, conns, nil } @@ -296,7 +297,7 @@ func closeBaseDB(tctx *tcontext.Context, baseDB *conn.BaseDB) { } } -func getTableIndex(tctx *tcontext.Context, conn *WorkerConn, table *table) error { +func getTableIndex(tctx *tcontext.Context, conn *DBConn, table *table) error { if table.schema == "" || table.name == "" { return terror.ErrDBUnExpect.Generate("schema/table is empty") } @@ -353,7 +354,7 @@ func getTableIndex(tctx *tcontext.Context, conn *WorkerConn, table *table) error return nil } -func getTableColumns(tctx *tcontext.Context, conn *WorkerConn, table *table) error { +func getTableColumns(tctx *tcontext.Context, conn *DBConn, table *table) error { if table.schema == "" || table.name == "" { return terror.ErrDBUnExpect.Generate("schema/table is empty") } diff --git a/syncer/db_test.go b/syncer/db_test.go index 8f0222be62..6e6ab7e042 100644 --- a/syncer/db_test.go +++ b/syncer/db_test.go @@ -148,7 +148,7 @@ func (s *testSyncerSuite) TestExecuteSQLSWithIgnore(c *C) { c.Assert(err, IsNil) dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) - conn := &WorkerConn{ + conn := &DBConn{ baseConn: &conn.BaseConn{ DBConn: dbConn, RetryStrategy: &retry.FiniteRetryStrategy{}, @@ -241,7 +241,7 @@ func (s *testDBSuite) TestTimezone(c *C) { s.resetBinlogSyncer(c) // we should not use `sql.DB.Exec` to do query which depends on session variables - // because `sql.DB.Exec` will choose a underlying WorkerConn for every query from the connection pool + // because `sql.DB.Exec` will choose a underlying DBConn for every query from the connection pool // and different Conn using different session // ref: `sql.DB.Conn` // and `set @@global` is also not reasonable, because it can not affect sessions already exist diff --git a/syncer/online_ddl.go b/syncer/online_ddl.go index b89483fa3f..e00b9e7ccb 100644 --- a/syncer/online_ddl.go +++ b/syncer/online_ddl.go @@ -81,7 +81,7 @@ type OnlineDDLStorage struct { cfg *config.SubTaskConfig db *conn.BaseDB - dbConn *WorkerConn + dbConn *DBConn schema string // schema name, set through task config table string // table name, now it's task name id string // now it is `server-id` used as MySQL slave diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index daf1720ae6..951672d86c 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -404,7 +404,7 @@ type ShardingGroupKeeper struct { shardMetaTable string db *conn.BaseDB - dbConn *WorkerConn + dbConn *DBConn tctx *tcontext.Context } @@ -454,7 +454,7 @@ func (k *ShardingGroupKeeper) AddGroup(targetSchema, targetTable string, sourceI } // Init does initialization staff -func (k *ShardingGroupKeeper) Init(conn *WorkerConn) error { +func (k *ShardingGroupKeeper) Init(conn *DBConn) error { k.clear() if conn != nil { k.dbConn = conn diff --git a/syncer/syncer.go b/syncer/syncer.go index a6538e0442..f316623c9e 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -150,9 +150,9 @@ type Syncer struct { fromDB *UpStreamConn toDB *conn.BaseDB - toDBConns []*WorkerConn + toDBConns []*DBConn ddlDB *conn.BaseDB - ddlDBConn *WorkerConn + ddlDBConn *DBConn jobs []chan *job jobsClosed sync2.AtomicBool @@ -405,7 +405,7 @@ func (s *Syncer) Init() (err error) { // initShardingGroups initializes sharding groups according to source MySQL, filter rules and router rules // NOTE: now we don't support modify router rules after task has started -func (s *Syncer) initShardingGroups(conn *WorkerConn) error { +func (s *Syncer) initShardingGroups(conn *DBConn) error { // fetch tables from source and filter them sourceTables, err := s.fromDB.fetchAllDoTables(s.bwList) if err != nil { @@ -635,7 +635,7 @@ func (s *Syncer) clearAllTables() { s.genColsCache.reset() } -func (s *Syncer) getTableFromDB(db *WorkerConn, schema string, name string) (*table, error) { +func (s *Syncer) getTableFromDB(db *DBConn, schema string, name string) (*table, error) { table := &table{} table.schema = schema table.name = name @@ -855,7 +855,7 @@ func (s *Syncer) flushCheckPoints() error { return nil } -func (s *Syncer) sync(ctx *tcontext.Context, queueBucket string, db *WorkerConn, jobChan chan *job) { +func (s *Syncer) sync(ctx *tcontext.Context, queueBucket string, db *DBConn, jobChan chan *job) { defer s.wg.Done() idx := 0 @@ -2024,7 +2024,7 @@ func (s *Syncer) createDBs() error { dbCfg = s.cfg.To dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout). - AddMaxIdleConns(s.cfg.WorkerCount) + SetMaxIdleConns(s.cfg.WorkerCount) s.toDB, s.toDBConns, err = createConns(s.tctx, s.cfg, dbCfg, s.cfg.WorkerCount) if err != nil { diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 227b9e44a9..1127dd6fdd 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -938,7 +938,7 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) syncer.fromDB = &UpStreamConn{BaseDB: &conn.BaseDB{db, nil}} - syncer.toDBConns = []*WorkerConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} + syncer.toDBConns = []*DBConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} syncer.reset() streamer, err := syncer.streamerProducer.generateStreamer(pos) @@ -1222,8 +1222,8 @@ func (s *testSyncerSuite) TestSharding(c *C) { syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: &conn.BaseDB{fromDB, nil}} dbConn, err := db.Conn(ctx) c.Assert(err, IsNil) - syncer.toDBConns = []*WorkerConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} - syncer.ddlDBConn = &WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} // mock syncer.Init() function, because we need to pass mock dbs to different members' init syncer.genRouter() @@ -1231,8 +1231,8 @@ func (s *testSyncerSuite) TestSharding(c *C) { c.Assert(err, IsNil) checkPointDBConn, err := checkPointDB.Conn(ctx) c.Assert(err, IsNil) - syncer.initShardingGroups(&WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}}}) - syncer.checkpoint.Init(&WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) + syncer.initShardingGroups(&DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}}}) + syncer.checkpoint.Init(&DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) syncer.reset() events := append(createEvents, s.generateEvents(_case.testEvents, c)...) syncer.streamerProducer = &MockStreamProducer{events} @@ -1374,9 +1374,9 @@ func (s *testSyncerSuite) TestRun(c *C) { syncer := NewSyncer(s.cfg) syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: &conn.BaseDB{db, nil}} - syncer.toDBConns = []*WorkerConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}, + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}, {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} - syncer.ddlDBConn = &WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} c.Assert(syncer.Type(), Equals, pb.UnitType_Sync) syncer.columnMapping, err = cm.NewMapping(s.cfg.CaseSensitive, s.cfg.ColumnMappingRules) @@ -1390,7 +1390,7 @@ func (s *testSyncerSuite) TestRun(c *C) { checkPointMock.ExpectExec(fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s_syncer_checkpoint`", s.cfg.MetaSchema, s.cfg.Name)).WillReturnResult(sqlmock.NewResult(1, 1)) checkPointMock.ExpectCommit() - syncer.checkpoint.Init(&WorkerConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) + syncer.checkpoint.Init(&DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) syncer.reset() events1 := mockBinlogEvents{ mockBinlogEvent{typ: DBCreate, args: []interface{}{"test_1"}}, From da360075330f7735d3da19df1a0de81eb9c74fd3 Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 18 Sep 2019 18:23:25 +0800 Subject: [PATCH 12/50] address comments --- loader/db.go | 6 +++--- loader/loader.go | 18 ++++++++++++------ pkg/retry/strategy_test.go | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/loader/db.go b/loader/db.go index c8f917324e..d224762a6c 100644 --- a/loader/db.go +++ b/loader/db.go @@ -179,9 +179,9 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount for i := 0; i < workerCount; i++ { baseConn, err := baseDB.GetBaseConn(tctx.Context()) if err != nil { - err := baseDB.Close() - if err != nil { - tctx.L().Error("failed to close baseDB") + terr := baseDB.Close() + if terr != nil { + tctx.L().Error("failed to close baseDB", zap.Error(terr)) } return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) } diff --git a/loader/loader.go b/loader/loader.go index 8a7ccb2e35..ee59bed4ab 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -110,7 +110,6 @@ func (w *Worker) Close() { close(w.jobQueue) w.wg.Wait() - w.conn.Close() } func (w *Worker) run(ctx context.Context, fileJobQueue chan *fileJob, workerWg *sync.WaitGroup, runFatalChan chan *pb.ProcessError) { @@ -559,9 +558,16 @@ func (l *Loader) Close() { } l.stopLoad() + + for _, c := range l.toDBConns { + err := c.Close() + if err != nil { + l.tctx.L().Error("close downstream connetion error", log.ShortError(err)) + } + } err := l.toDB.Close() if err != nil { - l.tctx.L().Error("close toDB error", log.ShortError(err)) + l.tctx.L().Error("close downstream DB error", log.ShortError(err)) } l.checkPoint.Close() l.closed.Set(true) @@ -975,12 +981,12 @@ func fetchMatchedLiteral(ctx *tcontext.Context, router *router.Table, schema, ta func (l *Loader) restoreData(ctx context.Context) error { begin := time.Now() - baseDB, conn, err := createConn(ctx, l.cfg) + baseDB, dbConn, err := createConn(ctx, l.cfg) if err != nil { return err } defer func() { - conn.baseConn.Close() + dbConn.Close() baseDB.Close() }() @@ -998,7 +1004,7 @@ func (l *Loader) restoreData(ctx context.Context) error { // create db dbFile := fmt.Sprintf("%s/%s-schema-create.sql", l.cfg.Dir, db) l.tctx.L().Info("start to create schema", zap.String("schema file", dbFile)) - err = l.restoreSchema(conn, dbFile, db) + err = l.restoreSchema(dbConn, dbFile, db) if err != nil { return err } @@ -1025,7 +1031,7 @@ func (l *Loader) restoreData(ctx context.Context) error { // create table l.tctx.L().Info("start to create table", zap.String("table file", tableFile)) - err := l.restoreTable(conn, tableFile, db, table) + err := l.restoreTable(dbConn, tableFile, db, table) if err != nil { return err } diff --git a/pkg/retry/strategy_test.go b/pkg/retry/strategy_test.go index 466fe1ba07..18ee8b43d2 100644 --- a/pkg/retry/strategy_test.go +++ b/pkg/retry/strategy_test.go @@ -14,13 +14,13 @@ package retry import ( + "database/sql/driver" "testing" "time" tcontext "github.com/pingcap/dm/pkg/context" "github.com/pingcap/dm/pkg/terror" - "github.com/go-sql-driver/mysql" . "github.com/pingcap/check" ) @@ -72,7 +72,7 @@ func (t *testStrategySuite) TestFiniteRetryStrategy(c *C) { return IsRetryableError(err) } operateFn = func(*tcontext.Context) (interface{}, error) { - mysqlErr := mysql.ErrInvalidConn + mysqlErr := driver.ErrBadConn return nil, terror.ErrDBInvalidConn.Delegate(mysqlErr, "test invalid connection") } _, opCount, err = strategy.Apply(ctx, params, operateFn) From 8035a0c09e2ee495791e9b244d55318190b8b6ff Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 18 Sep 2019 19:19:33 +0800 Subject: [PATCH 13/50] address comment --- dm/config/subtask.go | 4 +++- pkg/conn/basedb.go | 8 +++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dm/config/subtask.go b/dm/config/subtask.go index b0ab4a2088..369facbf47 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -42,6 +42,7 @@ const ( var ( defaultMaxAllowedPacket = 64 * 1024 * 1024 // 64MiB, equal to TiDB's default + defaultMaxIdleConns = 2 ) // RawDBConfig contains some low level database config @@ -54,7 +55,8 @@ type RawDBConfig struct { // DefaultRawDBConfig returns a default raw database config func DefaultRawDBConfig(readTimeout string) *RawDBConfig { return &RawDBConfig{ - ReadTimeout: readTimeout, + ReadTimeout: readTimeout, + MaxIdleConns: defaultMaxIdleConns, } } diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index ad52710089..a30d057ac2 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -43,7 +43,7 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/?charset=utf8mb4&interpolateParams=true&maxAllowedPacket=%d", config.User, config.Password, config.Host, config.Port, *config.MaxAllowedPacket) - maxIdleConn := -1 + var maxIdleConns int rawCfg := config.RawDBCfg if rawCfg != nil { if rawCfg.ReadTimeout != "" { @@ -52,15 +52,13 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { if rawCfg.WriteTimeout != "" { dsn += fmt.Sprintf("&writeTimeout=%s", rawCfg.WriteTimeout) } - maxIdleConn = rawCfg.MaxIdleConns + maxIdleConns = rawCfg.MaxIdleConns } db, err := sql.Open("mysql", dsn) if err != nil { return nil, terror.ErrDBDriverError.Delegate(err) } - if maxIdleConn >= 0 { - db.SetMaxIdleConns(maxIdleConn) - } + db.SetMaxIdleConns(maxIdleConns) return &BaseDB{db, &retry.FiniteRetryStrategy{}}, nil } From 8266bf0748699583ae5e85d348235dd44cfb222c Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 23 Sep 2019 10:42:14 +0800 Subject: [PATCH 14/50] address comment --- dm/config/subtask.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dm/config/subtask.go b/dm/config/subtask.go index 369facbf47..fb8eef3e50 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -67,9 +67,8 @@ func (c *RawDBConfig) SetWriteTimeout(writeTimeout string) *RawDBConfig { } // SetMaxIdleConns set maxIdleConns for raw database -// set value < 0 then do nothing (default 2 idle connections) -// set value == 0 then no idle connection -// set value > 0 then conns idle connections +// set value <= 0 then no idle connections are retained. +// set value > 0 then `value` idle connections are retained. func (c *RawDBConfig) SetMaxIdleConns(value int) *RawDBConfig { c.MaxIdleConns = value return c From 9e031c405776bf4ac10c2e965df587726a9015f3 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 23 Sep 2019 12:32:06 +0800 Subject: [PATCH 15/50] baseDB close conns generated from itself --- loader/checkpoint.go | 6 +----- loader/loader.go | 11 +---------- pkg/conn/basedb.go | 29 +++++++++++++++++++++++++---- pkg/conn/basedb_test.go | 3 +-- syncer/syncer_test.go | 6 +++--- 5 files changed, 31 insertions(+), 24 deletions(-) diff --git a/loader/checkpoint.go b/loader/checkpoint.go index a3b1567777..0ada5b25c9 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -298,11 +298,7 @@ func (cp *RemoteCheckPoint) Init(filename string, endPos int64) error { // Close implements CheckPoint.Close func (cp *RemoteCheckPoint) Close() { - err := cp.conn.Close() - if err != nil { - cp.tctx.L().Error("close checkpoint connection", log.ShortError(err)) - } - err = cp.db.Close() + err := cp.db.Close() if err != nil { cp.tctx.L().Error("close checkpoint db", log.ShortError(err)) } diff --git a/loader/loader.go b/loader/loader.go index ee59bed4ab..429f17fdcd 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -559,12 +559,6 @@ func (l *Loader) Close() { l.stopLoad() - for _, c := range l.toDBConns { - err := c.Close() - if err != nil { - l.tctx.L().Error("close downstream connetion error", log.ShortError(err)) - } - } err := l.toDB.Close() if err != nil { l.tctx.L().Error("close downstream DB error", log.ShortError(err)) @@ -985,10 +979,7 @@ func (l *Loader) restoreData(ctx context.Context) error { if err != nil { return err } - defer func() { - dbConn.Close() - baseDB.Close() - }() + defer baseDB.Close() dispatchMap := make(map[string]*fileJob) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index a30d057ac2..b784259a6c 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -60,16 +60,25 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { } db.SetMaxIdleConns(maxIdleConns) - return &BaseDB{db, &retry.FiniteRetryStrategy{}}, nil + return NewBaseDB(db), nil } // BaseDB wraps *sql.DB type BaseDB struct { DB *sql.DB + // hold all db connections generated from this BaseDB + conns []*BaseConn + Retry retry.Strategy } +// NewBaseDB returns *BaseDB object +func NewBaseDB(db *sql.DB) *BaseDB { + conns := make([]*BaseConn, 0) + return &BaseDB{db, conns, &retry.FiniteRetryStrategy{}} +} + // GetBaseConn retrieves *BaseConn which has own retryStrategy func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { conn, err := d.DB.Conn(ctx) @@ -80,8 +89,9 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { if err != nil { return nil, terror.ErrDBDriverError.Delegate(err) } - - return newBaseConn(conn, d.Retry), nil + baseConn := newBaseConn(conn, d.Retry) + d.conns = append(d.conns, baseConn) + return baseConn, nil } // Close release db resource @@ -89,5 +99,16 @@ func (d *BaseDB) Close() error { if d == nil || d.DB == nil { return nil } - return d.DB.Close() + var err error + for _, conn := range d.conns { + terr := conn.Close() + if err == nil { + err = terr + } + } + terr := d.DB.Close() + if err == nil { + return terr + } + return err } diff --git a/pkg/conn/basedb_test.go b/pkg/conn/basedb_test.go index 9a260e70bf..1674609efa 100644 --- a/pkg/conn/basedb_test.go +++ b/pkg/conn/basedb_test.go @@ -18,7 +18,6 @@ import ( . "github.com/pingcap/check" tcontext "github.com/pingcap/dm/pkg/context" - "github.com/pingcap/dm/pkg/retry" ) var _ = Suite(&testBaseDBSuite{}) @@ -30,7 +29,7 @@ func (t *testBaseDBSuite) TestGetBaseConn(c *C) { db, mock, err := sqlmock.New() c.Assert(err, IsNil) - baseDB := &BaseDB{db, &retry.FiniteRetryStrategy{}} + baseDB := NewBaseDB(db) tctx := tcontext.Background() diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 1127dd6fdd..a3bf27901d 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -937,7 +937,7 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { // use upstream dbConn as mock downstream dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) - syncer.fromDB = &UpStreamConn{BaseDB: &conn.BaseDB{db, nil}} + syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} syncer.toDBConns = []*DBConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} syncer.reset() @@ -1219,7 +1219,7 @@ func (s *testSyncerSuite) TestSharding(c *C) { ctx := context.Background() // fromDB mocks upstream dbConn, dbConn mocks downstream dbConn - syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: &conn.BaseDB{fromDB, nil}} + syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: conn.NewBaseDB(fromDB)} dbConn, err := db.Conn(ctx) c.Assert(err, IsNil) syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} @@ -1373,7 +1373,7 @@ func (s *testSyncerSuite) TestRun(c *C) { s.cfg.DisableCausality = false syncer := NewSyncer(s.cfg) - syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: &conn.BaseDB{db, nil}} + syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: conn.NewBaseDB(db)} syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}, {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} From 6e65f70e2ccc25b1aa6a33badb12f6341c27cdb7 Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 25 Sep 2019 10:40:00 +0800 Subject: [PATCH 16/50] address comment --- pkg/conn/basedb.go | 10 +++++++++- syncer/checkpoint.go | 5 +++-- syncer/db.go | 31 +++++++++++++++++-------------- syncer/online_ddl.go | 5 +++-- syncer/sharding_group.go | 5 +++-- 5 files changed, 35 insertions(+), 21 deletions(-) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index b784259a6c..60f2f97676 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -17,6 +17,7 @@ import ( "context" "database/sql" "fmt" + "sync" "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/pkg/retry" @@ -67,6 +68,8 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { type BaseDB struct { DB *sql.DB + lock *sync.Mutex + // hold all db connections generated from this BaseDB conns []*BaseConn @@ -76,11 +79,14 @@ type BaseDB struct { // NewBaseDB returns *BaseDB object func NewBaseDB(db *sql.DB) *BaseDB { conns := make([]*BaseConn, 0) - return &BaseDB{db, conns, &retry.FiniteRetryStrategy{}} + lock := new(sync.Mutex) + return &BaseDB{db, lock, conns, &retry.FiniteRetryStrategy{}} } // GetBaseConn retrieves *BaseConn which has own retryStrategy func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { + d.lock.Lock() + defer d.lock.Unlock() conn, err := d.DB.Conn(ctx) if err != nil { return nil, terror.ErrDBDriverError.Delegate(err) @@ -100,6 +106,8 @@ func (d *BaseDB) Close() error { return nil } var err error + d.lock.Lock() + defer d.lock.Unlock() for _, conn := range d.conns { terr := conn.Close() if err == nil { diff --git a/syncer/checkpoint.go b/syncer/checkpoint.go index 9d09ec1959..56fcfa2616 100644 --- a/syncer/checkpoint.go +++ b/syncer/checkpoint.go @@ -233,8 +233,9 @@ func (cp *RemoteCheckPoint) Init(conn *DBConn) error { if conn != nil { cp.dbConn = conn } else { - cp.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) - db, dbConn, err := createConn(cp.tctx, cp.cfg, cp.cfg.To) + checkPointDB := cp.cfg.To + checkPointDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + db, dbConn, err := createConn(cp.tctx, cp.cfg, checkPointDB) if err != nil { return err } diff --git a/syncer/db.go b/syncer/db.go index dde0bd3071..0bbae1d96e 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -77,6 +77,16 @@ func createBaseDB(dbCfg config.DBConfig) (*conn.BaseDB, error) { return db, nil } +// close baseDB to release all connection generated by this baseDB and this baseDB +func closeBaseDB(tctx *tcontext.Context, baseDB *conn.BaseDB) { + if baseDB != nil { + err := baseDB.Close() + if err != nil { + tctx.L().Error("fail to close baseDB", log.ShortError(err)) + } + } +} + // UpStreamConn connect to upstream DB, use *sql.DB instead of *sql.Conn type UpStreamConn struct { cfg *config.SubTaskConfig @@ -117,10 +127,7 @@ func createUpStreamConn(cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*UpSt func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { if conn != nil { - err := conn.BaseDB.Close() - if err != nil { - tctx.L().Error("fail to close baseConn connection", log.ShortError(err)) - } + closeBaseDB(tctx, conn.BaseDB) } } @@ -143,6 +150,12 @@ func (conn *DBConn) resetConn(tctx *tcontext.Context) error { if err != nil { return err } + if conn.baseConn != nil { + err := conn.baseConn.Close() + if err != nil { + tctx.L().Warn("close connection error", log.ShortError(err)) + } + } conn.baseConn = dbConn return nil } @@ -287,16 +300,6 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config return db, conns, nil } -// close baseDB to release all connection generated by this baseDB and this baseDB -func closeBaseDB(tctx *tcontext.Context, baseDB *conn.BaseDB) { - if baseDB != nil { - err := baseDB.Close() - if err != nil { - tctx.L().Error("fail to close baseDB", log.ShortError(err)) - } - } -} - func getTableIndex(tctx *tcontext.Context, conn *DBConn, table *table) error { if table.schema == "" || table.name == "" { return terror.ErrDBUnExpect.Generate("schema/table is empty") diff --git a/syncer/online_ddl.go b/syncer/online_ddl.go index e00b9e7ccb..16c9b60e5a 100644 --- a/syncer/online_ddl.go +++ b/syncer/online_ddl.go @@ -108,8 +108,9 @@ func NewOnlineDDLStorage(newtctx *tcontext.Context, cfg *config.SubTaskConfig) * // Init initials online handler func (s *OnlineDDLStorage) Init() error { - s.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) - db, dbConn, err := createConn(s.tctx, s.cfg, s.cfg.To) + onlineDB := s.cfg.To + onlineDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + db, dbConn, err := createConn(s.tctx, s.cfg, onlineDB) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index 951672d86c..1f40fd7c65 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -459,8 +459,9 @@ func (k *ShardingGroupKeeper) Init(conn *DBConn) error { if conn != nil { k.dbConn = conn } else { - k.cfg.To.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) - db, dbConn, err := createConn(k.tctx, k.cfg, k.cfg.To) + sgkDB := k.cfg.To + sgkDB.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) + db, dbConn, err := createConn(k.tctx, k.cfg, sgkDB) if err != nil { return err } From a8342a10020e813a51a538928fea37bfbac6cf98 Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 25 Sep 2019 12:15:55 +0800 Subject: [PATCH 17/50] use createConns instead of createConn --- loader/checkpoint.go | 4 ++-- loader/checkpoint_test.go | 3 ++- loader/db.go | 13 ------------- loader/loader.go | 7 +++++-- syncer/checkpoint.go | 4 ++-- syncer/db.go | 16 ---------------- syncer/online_ddl.go | 4 ++-- syncer/sharding_group.go | 4 ++-- syncer/syncer.go | 6 +++++- 9 files changed, 20 insertions(+), 41 deletions(-) diff --git a/loader/checkpoint.go b/loader/checkpoint.go index 0ada5b25c9..cc1a076e3b 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -77,7 +77,7 @@ type RemoteCheckPoint struct { } func newRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id string) (CheckPoint, error) { - db, dbConn, err := createConn(tctx.Context(), cfg) + db, dbConns, err := createConns(tctx, cfg, 1) if err != nil { return nil, err } @@ -86,7 +86,7 @@ func newRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id s cp := &RemoteCheckPoint{ db: db, - conn: dbConn, + conn: dbConns[0], id: id, restoringFiles: make(map[string]map[string]FilePosSet), finishedTables: make(map[string]struct{}), diff --git a/loader/checkpoint_test.go b/loader/checkpoint_test.go index 65bf724617..d0aadb2564 100644 --- a/loader/checkpoint_test.go +++ b/loader/checkpoint_test.go @@ -113,8 +113,9 @@ func (t *testCheckPointSuite) TestForDB(c *C) { c.Assert(count, Equals, len(cases)) // update checkpoints - db, conn, err := createConn(tctx.Context(), t.cfg) + db, conns, err := createConns(tctx, t.cfg, 1) c.Assert(err, IsNil) + conn := conns[0] defer func() { conn.Close() db.Close() diff --git a/loader/db.go b/loader/db.go index d224762a6c..2d9897f40c 100644 --- a/loader/db.go +++ b/loader/db.go @@ -14,7 +14,6 @@ package loader import ( - "context" "database/sql" "strconv" "strings" @@ -158,18 +157,6 @@ func (conn *DBConn) Close() error { return conn.baseConn.Close() } -func createConn(ctx context.Context, cfg *config.SubTaskConfig) (*conn.BaseDB, *DBConn, error) { - baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) - if err != nil { - return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) - } - baseConn, err := baseDB.GetBaseConn(ctx) - if err != nil { - return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) - } - return baseDB, &DBConn{baseConn: baseConn, cfg: cfg}, nil -} - func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*DBConn, error) { baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) if err != nil { diff --git a/loader/loader.go b/loader/loader.go index 429f17fdcd..285790a27c 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -975,11 +975,14 @@ func fetchMatchedLiteral(ctx *tcontext.Context, router *router.Table, schema, ta func (l *Loader) restoreData(ctx context.Context) error { begin := time.Now() - baseDB, dbConn, err := createConn(ctx, l.cfg) + baseConn, err := l.toDB.GetBaseConn(ctx) if err != nil { return err } - defer baseDB.Close() + dbConn := &DBConn{ + baseConn: baseConn, + cfg: l.cfg, + } dispatchMap := make(map[string]*fileJob) diff --git a/syncer/checkpoint.go b/syncer/checkpoint.go index 56fcfa2616..a29f52da97 100644 --- a/syncer/checkpoint.go +++ b/syncer/checkpoint.go @@ -235,12 +235,12 @@ func (cp *RemoteCheckPoint) Init(conn *DBConn) error { } else { checkPointDB := cp.cfg.To checkPointDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) - db, dbConn, err := createConn(cp.tctx, cp.cfg, checkPointDB) + db, dbConns, err := createConns(cp.tctx, cp.cfg, checkPointDB, 1) if err != nil { return err } cp.db = db - cp.dbConn = dbConn + cp.dbConn = dbConns[0] } return cp.prepare() diff --git a/syncer/db.go b/syncer/db.go index 0bbae1d96e..40545b1af9 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -179,7 +179,6 @@ func (conn *DBConn) querySQL(tctx *tcontext.Context, query string, args ...inter return false }, } - ret, _, err := conn.baseConn.ApplyRetryStrategy( tctx, params, @@ -265,21 +264,6 @@ func (conn *DBConn) executeSQL(tctx *tcontext.Context, queries []string, args .. return conn.executeSQLWithIgnore(tctx, nil, queries, args...) } -func createConn(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*conn.BaseDB, *DBConn, error) { - baseDB, err := createBaseDB(dbCfg) - if err != nil { - return nil, nil, err - } - baseConn, err := baseDB.GetBaseConn(tctx.Context()) - if err != nil { - return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) - } - resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { - return baseDB.GetBaseConn(tctx.Context()) - } - return baseDB, &DBConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}, nil -} - func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int) (*conn.BaseDB, []*DBConn, error) { conns := make([]*DBConn, 0, count) db, err := createBaseDB(dbCfg) diff --git a/syncer/online_ddl.go b/syncer/online_ddl.go index 16c9b60e5a..a1273c9d56 100644 --- a/syncer/online_ddl.go +++ b/syncer/online_ddl.go @@ -110,12 +110,12 @@ func NewOnlineDDLStorage(newtctx *tcontext.Context, cfg *config.SubTaskConfig) * func (s *OnlineDDLStorage) Init() error { onlineDB := s.cfg.To onlineDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) - db, dbConn, err := createConn(s.tctx, s.cfg, onlineDB) + db, dbConns, err := createConns(s.tctx, s.cfg, onlineDB, 1) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) } s.db = db - s.dbConn = dbConn + s.dbConn = dbConns[0] err = s.prepare() if err != nil { diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index 1f40fd7c65..b4c5d7d9e2 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -461,12 +461,12 @@ func (k *ShardingGroupKeeper) Init(conn *DBConn) error { } else { sgkDB := k.cfg.To sgkDB.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) - db, dbConn, err := createConn(k.tctx, k.cfg, sgkDB) + db, dbConns, err := createConns(k.tctx, k.cfg, sgkDB, 1) if err != nil { return err } k.db = db - k.dbConn = dbConn + k.dbConn = dbConns[0] } err := k.prepare() return err diff --git a/syncer/syncer.go b/syncer/syncer.go index f29929ef37..0ac217af82 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -2043,7 +2043,11 @@ func (s *Syncer) createDBs() error { // baseConn for ddl dbCfg = s.cfg.To dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) - s.ddlDB, s.ddlDBConn, err = createConn(s.tctx, s.cfg, dbCfg) + + var ddlDBConns []*DBConn + s.ddlDB, ddlDBConns, err = createConns(s.tctx, s.cfg, dbCfg, 1) + s.ddlDBConn = ddlDBConns[0] + if err != nil { closeUpstreamConn(s.tctx, s.fromDB) closeBaseDB(s.tctx, s.toDB) From 25cbcdc6dabe6e20be3e9bbf8acc88725ee2d47b Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 25 Sep 2019 14:28:46 +0800 Subject: [PATCH 18/50] address comments --- syncer/checkpoint.go | 22 +++++++++------------- syncer/checkpoint_test.go | 3 ++- syncer/syncer.go | 2 +- syncer/syncer_test.go | 11 +++++++++-- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/syncer/checkpoint.go b/syncer/checkpoint.go index a29f52da97..7449feabea 100644 --- a/syncer/checkpoint.go +++ b/syncer/checkpoint.go @@ -123,7 +123,7 @@ func (b *binlogPoint) String() string { // because, when restarting to continue the sync, all sharding DDLs must try-sync again type CheckPoint interface { // Init initializes the CheckPoint - Init(conn *DBConn) error + Init() error // Close closes the CheckPoint Close() @@ -229,19 +229,15 @@ func NewRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id s } // Init implements CheckPoint.Init -func (cp *RemoteCheckPoint) Init(conn *DBConn) error { - if conn != nil { - cp.dbConn = conn - } else { - checkPointDB := cp.cfg.To - checkPointDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) - db, dbConns, err := createConns(cp.tctx, cp.cfg, checkPointDB, 1) - if err != nil { - return err - } - cp.db = db - cp.dbConn = dbConns[0] +func (cp *RemoteCheckPoint) Init() error { + checkPointDB := cp.cfg.To + checkPointDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + db, dbConns, err := createConns(cp.tctx, cp.cfg, checkPointDB, 1) + if err != nil { + return err } + cp.db = db + cp.dbConn = dbConns[0] return cp.prepare() } diff --git a/syncer/checkpoint_test.go b/syncer/checkpoint_test.go index f55b85c1ff..44ef5a6093 100644 --- a/syncer/checkpoint_test.go +++ b/syncer/checkpoint_test.go @@ -94,7 +94,8 @@ func (s *testCheckpointSuite) TestCheckPoint(c *C) { c.Assert(err, IsNil) conn := &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} - err = cp.Init(conn) + cp.(*RemoteCheckPoint).dbConn = conn + err = cp.(*RemoteCheckPoint).prepare() c.Assert(err, IsNil) cp.Clear() diff --git a/syncer/syncer.go b/syncer/syncer.go index 0ac217af82..99e013a92a 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -350,7 +350,7 @@ func (s *Syncer) Init() (err error) { rollbackHolder.Add(fr.FuncRollback{Name: "close-sharding-group-keeper", Fn: s.sgk.Close}) } - err = s.checkpoint.Init(nil) + err = s.checkpoint.Init() if err != nil { return err } diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 0c0bd667a5..115873959f 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -1232,7 +1232,11 @@ func (s *testSyncerSuite) TestSharding(c *C) { checkPointDBConn, err := checkPointDB.Conn(ctx) c.Assert(err, IsNil) syncer.initShardingGroups(&DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}}}) - syncer.checkpoint.Init(&DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) + + // mock syncer.checkpoint.Init() function + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}} + syncer.checkpoint.(*RemoteCheckPoint).prepare() + syncer.reset() events := append(createEvents, s.generateEvents(_case.testEvents, c)...) syncer.streamerProducer = &MockStreamProducer{events} @@ -1390,7 +1394,10 @@ func (s *testSyncerSuite) TestRun(c *C) { checkPointMock.ExpectExec(fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s_syncer_checkpoint`", s.cfg.MetaSchema, s.cfg.Name)).WillReturnResult(sqlmock.NewResult(1, 1)) checkPointMock.ExpectCommit() - syncer.checkpoint.Init(&DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}}) + // mock syncer.checkpoint.Init() function + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}} + syncer.checkpoint.(*RemoteCheckPoint).prepare() + syncer.reset() events1 := mockBinlogEvents{ mockBinlogEvent{typ: DBCreate, args: []interface{}{"test_1"}}, From f80c5bcc6acf9f2d512b795aafbba8c0801f0e05 Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 25 Sep 2019 14:33:57 +0800 Subject: [PATCH 19/50] address comments --- syncer/db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncer/db.go b/syncer/db.go index 40545b1af9..fd9ff85a2f 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -179,6 +179,7 @@ func (conn *DBConn) querySQL(tctx *tcontext.Context, query string, args ...inter return false }, } + ret, _, err := conn.baseConn.ApplyRetryStrategy( tctx, params, @@ -239,7 +240,6 @@ func (conn *DBConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError fun func(ctx *tcontext.Context) (interface{}, error) { startTime := time.Now() ret, err := conn.baseConn.ExecuteSQLWithIgnoreError(ctx, ignoreError, queries, args...) - if err == nil { cost := time.Since(startTime) txnHistogram.WithLabelValues(conn.cfg.Name).Observe(cost.Seconds()) From 67cf1e29d34c8d9b7627a5fe58cbb69d1b684893 Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 25 Sep 2019 14:56:29 +0800 Subject: [PATCH 20/50] address comments --- syncer/sharding_group.go | 23 +++++++++-------------- syncer/syncer.go | 22 +++++++++++++--------- syncer/syncer_test.go | 6 +++++- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index b4c5d7d9e2..206e3b798b 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -454,22 +454,17 @@ func (k *ShardingGroupKeeper) AddGroup(targetSchema, targetTable string, sourceI } // Init does initialization staff -func (k *ShardingGroupKeeper) Init(conn *DBConn) error { +func (k *ShardingGroupKeeper) Init() error { k.clear() - if conn != nil { - k.dbConn = conn - } else { - sgkDB := k.cfg.To - sgkDB.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) - db, dbConns, err := createConns(k.tctx, k.cfg, sgkDB, 1) - if err != nil { - return err - } - k.db = db - k.dbConn = dbConns[0] + sgkDB := k.cfg.To + sgkDB.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) + db, dbConns, err := createConns(k.tctx, k.cfg, sgkDB, 1) + if err != nil { + return err } - err := k.prepare() - return err + k.db = db + k.dbConn = dbConns[0] + return k.prepare() } // clear clears all sharding groups diff --git a/syncer/syncer.go b/syncer/syncer.go index 99e013a92a..414ca22be7 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -343,7 +343,12 @@ func (s *Syncer) Init() (err error) { } if s.cfg.IsSharding { - err = s.initShardingGroups(nil) + err = s.sgk.Init() + if err != nil { + return err + } + + err = s.initShardingGroups() if err != nil { return err } @@ -405,19 +410,13 @@ func (s *Syncer) Init() (err error) { // initShardingGroups initializes sharding groups according to source MySQL, filter rules and router rules // NOTE: now we don't support modify router rules after task has started -func (s *Syncer) initShardingGroups(conn *DBConn) error { +func (s *Syncer) initShardingGroups() error { // fetch tables from source and filter them sourceTables, err := s.fromDB.fetchAllDoTables(s.bwList) if err != nil { return err } - // clear old sharding group and initials some needed data - err = s.sgk.Init(conn) - if err != nil { - return err - } - // convert according to router rules // target-schema -> target-table -> source-IDs mapper := make(map[string]map[string][]string, len(sourceTables)) @@ -2328,7 +2327,12 @@ func (s *Syncer) Update(cfg *config.SubTaskConfig) error { if s.cfg.IsSharding { // re-init sharding group - err = s.initShardingGroups(nil) + err = s.sgk.Init() + if err != nil { + return err + } + + err = s.initShardingGroups() if err != nil { return err } diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 115873959f..755afe99e5 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -1231,7 +1231,11 @@ func (s *testSyncerSuite) TestSharding(c *C) { c.Assert(err, IsNil) checkPointDBConn, err := checkPointDB.Conn(ctx) c.Assert(err, IsNil) - syncer.initShardingGroups(&DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}}}) + + // mock syncer.shardGroupkeeper.Init() function + syncer.sgk.dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}}} + syncer.sgk.prepare() + syncer.initShardingGroups() // mock syncer.checkpoint.Init() function syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}} From d3dd6c176474c95b5fd50466485d6a96450964ef Mon Sep 17 00:00:00 2001 From: luancheng Date: Thu, 26 Sep 2019 13:37:02 +0800 Subject: [PATCH 21/50] address comments --- pkg/conn/basedb.go | 32 +++++++++++++++++++------------- syncer/db.go | 10 ++++------ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 60f2f97676..f44a22ffa5 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -66,27 +66,23 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { // BaseDB wraps *sql.DB type BaseDB struct { + sync.Mutex DB *sql.DB - lock *sync.Mutex - // hold all db connections generated from this BaseDB - conns []*BaseConn + conns map[*BaseConn]bool Retry retry.Strategy } // NewBaseDB returns *BaseDB object func NewBaseDB(db *sql.DB) *BaseDB { - conns := make([]*BaseConn, 0) - lock := new(sync.Mutex) - return &BaseDB{db, lock, conns, &retry.FiniteRetryStrategy{}} + conns := make(map[*BaseConn]bool) + return &BaseDB{DB: db, conns: conns, Retry: &retry.FiniteRetryStrategy{}} } // GetBaseConn retrieves *BaseConn which has own retryStrategy func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { - d.lock.Lock() - defer d.lock.Unlock() conn, err := d.DB.Conn(ctx) if err != nil { return nil, terror.ErrDBDriverError.Delegate(err) @@ -96,19 +92,29 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { return nil, terror.ErrDBDriverError.Delegate(err) } baseConn := newBaseConn(conn, d.Retry) - d.conns = append(d.conns, baseConn) + d.Lock() + defer d.Unlock() + d.conns[baseConn] = true return baseConn, nil } -// Close release db resource +// CloseBaseConn release BaseConn resource from BaseDB, and close BaseConn +func (d *BaseDB) CloseBaseConn(conn *BaseConn) error { + d.Lock() + defer d.Unlock() + delete(d.conns, conn) + return conn.Close() +} + +// Close release baseDB resource func (d *BaseDB) Close() error { if d == nil || d.DB == nil { return nil } var err error - d.lock.Lock() - defer d.lock.Unlock() - for _, conn := range d.conns { + d.Lock() + defer d.Unlock() + for conn := range d.conns { terr := conn.Close() if err == nil { err = terr diff --git a/syncer/db.go b/syncer/db.go index fd9ff85a2f..389ac23afd 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -150,12 +150,6 @@ func (conn *DBConn) resetConn(tctx *tcontext.Context) error { if err != nil { return err } - if conn.baseConn != nil { - err := conn.baseConn.Close() - if err != nil { - tctx.L().Warn("close connection error", log.ShortError(err)) - } - } conn.baseConn = dbConn return nil } @@ -277,6 +271,10 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config return nil, nil, terror.WithScope(terror.ErrDBBadConn.Delegate(err), terror.ScopeDownstream) } resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { + err := db.CloseBaseConn(dbConn) + if err != nil { + return nil, err + } return db.GetBaseConn(tctx.Context()) } conns = append(conns, &DBConn{baseConn: dbConn, cfg: cfg, resetConnFn: resetConnFn}) From 306aa190b40fcbe6319514963a7e09364e787530 Mon Sep 17 00:00:00 2001 From: luancheng Date: Thu, 26 Sep 2019 13:46:43 +0800 Subject: [PATCH 22/50] remove loader DBConn close --- loader/checkpoint_test.go | 1 - loader/db.go | 9 +-------- pkg/conn/baseconn.go | 3 +-- pkg/conn/basedb.go | 6 +++--- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/loader/checkpoint_test.go b/loader/checkpoint_test.go index d0aadb2564..42a5fac47f 100644 --- a/loader/checkpoint_test.go +++ b/loader/checkpoint_test.go @@ -117,7 +117,6 @@ func (t *testCheckPointSuite) TestForDB(c *C) { c.Assert(err, IsNil) conn := conns[0] defer func() { - conn.Close() db.Close() }() for _, cs := range cases { diff --git a/loader/db.go b/loader/db.go index 2d9897f40c..71a5375a62 100644 --- a/loader/db.go +++ b/loader/db.go @@ -112,6 +112,7 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... return retry.IsRetryableError(err) }, } + _, _, err := conn.baseConn.ApplyRetryStrategy( ctx, params, @@ -149,14 +150,6 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... return err } -// Close release db connection resource, return it to BaseDB.db connection pool -func (conn *DBConn) Close() error { - if conn == nil || conn.baseConn == nil { - return nil - } - return conn.baseConn.Close() -} - func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*DBConn, error) { baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) if err != nil { diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 4ba031390e..0905cc9e00 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -146,8 +146,7 @@ func (conn *BaseConn) ApplyRetryStrategy(tctx *tcontext.Context, params retry.Pa return conn.RetryStrategy.Apply(tctx, params, operateFn) } -// Close release DB resource -func (conn *BaseConn) Close() error { +func (conn *BaseConn) close() error { if conn == nil || conn.DBConn == nil { return nil } diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index f44a22ffa5..91c3beab42 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -66,9 +66,9 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { // BaseDB wraps *sql.DB type BaseDB struct { - sync.Mutex DB *sql.DB + sync.Mutex // protects following fields // hold all db connections generated from this BaseDB conns map[*BaseConn]bool @@ -103,7 +103,7 @@ func (d *BaseDB) CloseBaseConn(conn *BaseConn) error { d.Lock() defer d.Unlock() delete(d.conns, conn) - return conn.Close() + return conn.close() } // Close release baseDB resource @@ -115,7 +115,7 @@ func (d *BaseDB) Close() error { d.Lock() defer d.Unlock() for conn := range d.conns { - terr := conn.Close() + terr := conn.close() if err == nil { err = terr } From 553578e37a2f59ab0c2437a1364b679932ab7269 Mon Sep 17 00:00:00 2001 From: luancheng Date: Thu, 26 Sep 2019 15:12:08 +0800 Subject: [PATCH 23/50] address comment --- pkg/conn/basedb.go | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 91c3beab42..15801ad716 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -68,16 +68,16 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { type BaseDB struct { DB *sql.DB - sync.Mutex // protects following fields + mu sync.Mutex // protects following fields // hold all db connections generated from this BaseDB - conns map[*BaseConn]bool + conns map[*BaseConn]struct{} Retry retry.Strategy } // NewBaseDB returns *BaseDB object func NewBaseDB(db *sql.DB) *BaseDB { - conns := make(map[*BaseConn]bool) + conns := make(map[*BaseConn]struct{}) return &BaseDB{DB: db, conns: conns, Retry: &retry.FiniteRetryStrategy{}} } @@ -92,18 +92,21 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { return nil, terror.ErrDBDriverError.Delegate(err) } baseConn := newBaseConn(conn, d.Retry) - d.Lock() - defer d.Unlock() - d.conns[baseConn] = true + d.mu.Lock() + defer d.mu.Unlock() + d.conns[baseConn] = struct{}{} return baseConn, nil } // CloseBaseConn release BaseConn resource from BaseDB, and close BaseConn func (d *BaseDB) CloseBaseConn(conn *BaseConn) error { - d.Lock() - defer d.Unlock() - delete(d.conns, conn) - return conn.close() + d.mu.Lock() + defer d.mu.Unlock() + if _, ok := d.conns[conn]; ok { + delete(d.conns, conn) + return conn.close() + } + return nil } // Close release baseDB resource @@ -112,8 +115,8 @@ func (d *BaseDB) Close() error { return nil } var err error - d.Lock() - defer d.Unlock() + d.mu.Lock() + defer d.mu.Unlock() for conn := range d.conns { terr := conn.close() if err == nil { From fd606dee8bd6a4caf01d0d1fb189945da71c10b6 Mon Sep 17 00:00:00 2001 From: luancheng Date: Sun, 29 Sep 2019 18:55:36 +0800 Subject: [PATCH 24/50] address comments --- loader/loader.go | 1 + syncer/db.go | 7 +++---- syncer/syncer.go | 4 ++-- syncer/syncer_test.go | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/loader/loader.go b/loader/loader.go index 285790a27c..acee64a27a 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -979,6 +979,7 @@ func (l *Loader) restoreData(ctx context.Context) error { if err != nil { return err } + defer l.toDB.CloseBaseConn(baseConn) dbConn := &DBConn{ baseConn: baseConn, cfg: l.cfg, diff --git a/syncer/db.go b/syncer/db.go index 389ac23afd..aff398b446 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -87,9 +87,8 @@ func closeBaseDB(tctx *tcontext.Context, baseDB *conn.BaseDB) { } } -// UpStreamConn connect to upstream DB, use *sql.DB instead of *sql.Conn +// UpStreamConn connect to upstream DB type UpStreamConn struct { - cfg *config.SubTaskConfig BaseDB *conn.BaseDB } @@ -117,12 +116,12 @@ func (conn *UpStreamConn) countBinaryLogsSize(pos mysql.Position) (int64, error) return countBinaryLogsSize(pos, conn.BaseDB.DB) } -func createUpStreamConn(cfg *config.SubTaskConfig, dbCfg config.DBConfig) (*UpStreamConn, error) { +func createUpStreamConn(dbCfg config.DBConfig) (*UpStreamConn, error) { baseDB, err := createBaseDB(dbCfg) if err != nil { return nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeUpstream) } - return &UpStreamConn{BaseDB: baseDB, cfg: cfg}, nil + return &UpStreamConn{BaseDB: baseDB}, nil } func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { diff --git a/syncer/syncer.go b/syncer/syncer.go index 414ca22be7..5c2153f954 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -2025,7 +2025,7 @@ func (s *Syncer) createDBs() error { var err error dbCfg := s.cfg.From dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout) - s.fromDB, err = createUpStreamConn(s.cfg, dbCfg) + s.fromDB, err = createUpStreamConn(dbCfg) if err != nil { return err } @@ -2417,7 +2417,7 @@ func (s *Syncer) UpdateFromConfig(cfg *config.SubTaskConfig) error { var err error s.cfg.From.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout) - s.fromDB, err = createUpStreamConn(s.cfg, s.cfg.From) + s.fromDB, err = createUpStreamConn(s.cfg.From) if err != nil { s.tctx.L().Error("fail to create baseConn connection", log.ShortError(err)) return err diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 755afe99e5..2c613678be 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -1219,7 +1219,7 @@ func (s *testSyncerSuite) TestSharding(c *C) { ctx := context.Background() // fromDB mocks upstream dbConn, dbConn mocks downstream dbConn - syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: conn.NewBaseDB(fromDB)} + syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(fromDB)} dbConn, err := db.Conn(ctx) c.Assert(err, IsNil) syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} @@ -1381,7 +1381,7 @@ func (s *testSyncerSuite) TestRun(c *C) { s.cfg.DisableCausality = false syncer := NewSyncer(s.cfg) - syncer.fromDB = &UpStreamConn{cfg: s.cfg, BaseDB: conn.NewBaseDB(db)} + syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}, {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} From a835b47b7a31754415e781ca609e035a6a07b60f Mon Sep 17 00:00:00 2001 From: luancheng Date: Sun, 29 Sep 2019 19:07:18 +0800 Subject: [PATCH 25/50] address comment --- syncer/db.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/syncer/db.go b/syncer/db.go index aff398b446..1f03c76ccf 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -88,6 +88,9 @@ func closeBaseDB(tctx *tcontext.Context, baseDB *conn.BaseDB) { } // UpStreamConn connect to upstream DB +// Normally, we need to get some upstream information through some helper functions +// these helper functions are all easy query functions, so we use a pool of connections here +// maybe change to one connection some day type UpStreamConn struct { BaseDB *conn.BaseDB } From 62e64b2940ef7cdb975011944f3d364486476a6e Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 8 Oct 2019 10:09:22 +0800 Subject: [PATCH 26/50] Update loader/db.go Co-Authored-By: amyangfei --- loader/db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader/db.go b/loader/db.go index 71a5375a62..654a83dd0c 100644 --- a/loader/db.go +++ b/loader/db.go @@ -163,7 +163,7 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount if terr != nil { tctx.L().Error("failed to close baseDB", zap.Error(terr)) } - return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) + return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg}) } From 7d0a41c11d6ee00ce60858bd650a9c47f6e6025b Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 8 Oct 2019 10:09:43 +0800 Subject: [PATCH 27/50] Update pkg/conn/basedb.go Co-Authored-By: amyangfei --- pkg/conn/basedb.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 15801ad716..035ddc5f28 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -57,7 +57,7 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { } db, err := sql.Open("mysql", dsn) if err != nil { - return nil, terror.ErrDBDriverError.Delegate(err) + return nil, terror.DBErrorAdapt(err, terror.ErrDBDriverError) } db.SetMaxIdleConns(maxIdleConns) From 7816f31067f53ff507c49c21406859340b05599a Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 8 Oct 2019 10:10:04 +0800 Subject: [PATCH 28/50] Update pkg/conn/basedb.go Co-Authored-By: amyangfei --- pkg/conn/basedb.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 035ddc5f28..e498be1cd8 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -85,7 +85,7 @@ func NewBaseDB(db *sql.DB) *BaseDB { func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { conn, err := d.DB.Conn(ctx) if err != nil { - return nil, terror.ErrDBDriverError.Delegate(err) + return nil, terror.DBErrorAdapt(err, terror.ErrDBDriverError) } err = conn.PingContext(ctx) if err != nil { From fd6caa4e24bca0050aa873fc92fa01a5ade13e91 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 8 Oct 2019 10:10:25 +0800 Subject: [PATCH 29/50] Update pkg/conn/basedb.go Co-Authored-By: amyangfei --- pkg/conn/basedb.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index e498be1cd8..167af6c55c 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -89,7 +89,7 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { } err = conn.PingContext(ctx) if err != nil { - return nil, terror.ErrDBDriverError.Delegate(err) + return nil, terror.DBErrorAdapt(err, terror.ErrDBDriverError) } baseConn := newBaseConn(conn, d.Retry) d.mu.Lock() From 2446fb4c272db24dc8427a1a0d4f24ec9a7f12d5 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 8 Oct 2019 11:02:06 +0800 Subject: [PATCH 30/50] loader unit add resetConn function --- loader/checkpoint.go | 8 ++++++++ loader/db.go | 34 ++++++++++++++++++++++++++++++++-- loader/loader.go | 28 ++++++++++++++++++++++++++++ syncer/db.go | 7 ++----- 4 files changed, 70 insertions(+), 7 deletions(-) diff --git a/loader/checkpoint.go b/loader/checkpoint.go index cc1a076e3b..dc5a0371d1 100644 --- a/loader/checkpoint.go +++ b/loader/checkpoint.go @@ -50,6 +50,9 @@ type CheckPoint interface { // Init initialize checkpoint data in tidb Init(filename string, endpos int64) error + // ResetConn resets database connections owned by the Checkpoint + ResetConn() error + // Close closes the CheckPoint Close() @@ -296,6 +299,11 @@ func (cp *RemoteCheckPoint) Init(filename string, endPos int64) error { return nil } +// ResetConn implements CheckPoint.ResetConn +func (cp *RemoteCheckPoint) ResetConn() error { + return cp.conn.resetConn(cp.tctx) +} + // Close implements CheckPoint.Close func (cp *RemoteCheckPoint) Close() { err := cp.db.Close() diff --git a/loader/db.go b/loader/db.go index 654a83dd0c..b51f704a55 100644 --- a/loader/db.go +++ b/loader/db.go @@ -39,6 +39,8 @@ import ( type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn + + resetConnFn func(*tcontext.Context) (*conn.BaseConn, error) } func (conn *DBConn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { @@ -104,6 +106,17 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... FirstRetryDuration: 2 * time.Second, BackoffStrategy: retry.LinearIncrease, IsRetryableFn: func(retryTime int, err error) bool { + if retry.IsConnectionError(err) { + err = conn.resetConn(ctx) + if err != nil { + ctx.L().Error("reset connection failed", zap.Int("retry", retryTime), + zap.String("queries", utils.TruncateInterface(queries, -1)), + zap.String("arguments", utils.TruncateInterface(args, -1)), + log.ShortError(err)) + return false + } + return true + } ctx.L().Warn("execute statements", zap.Int("retry", retryTime), zap.String("queries", utils.TruncateInterface(queries, -1)), zap.String("arguments", utils.TruncateInterface(args, -1)), @@ -150,10 +163,20 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... return err } +// resetConn reset one worker connection from specify *BaseDB +func (conn *DBConn) resetConn(tctx *tcontext.Context) error { + dbConn, err := conn.resetConnFn(tctx) + if err != nil { + return err + } + conn.baseConn = dbConn + return nil +} + func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*DBConn, error) { baseDB, err := conn.DefaultDBProvider.Apply(cfg.To) if err != nil { - return nil, nil, terror.WithScope(terror.DBErrorAdapt(err, terror.ErrDBDriverError), terror.ScopeDownstream) + return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } conns := make([]*DBConn, 0, workerCount) for i := 0; i < workerCount; i++ { @@ -165,7 +188,14 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount } return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } - conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg}) + resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { + err := baseDB.CloseBaseConn(baseConn) + if err != nil { + return nil, err + } + return baseDB.GetBaseConn(tctx.Context()) + } + conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}) } return baseDB, conns, nil } diff --git a/loader/loader.go b/loader/loader.go index acee64a27a..6e2c5e87a9 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -600,10 +600,38 @@ func (l *Loader) Resume(ctx context.Context, pr chan pb.ProcessResult) { return } + err := l.resetDBs() + if err != nil { + pr <- pb.ProcessResult{ + IsCanceled: false, + Errors: []*pb.ProcessError{ + unit.NewProcessError(pb.ErrorType_UnknownError, errors.ErrorStack(err)), + }, + } + return + } // continue the processing l.Process(ctx, pr) } +func (l *Loader) resetDBs() error { + var err error + + for i := 0; i < len(l.toDBConns); i++ { + err = l.toDBConns[i].resetConn(l.tctx) + if err != nil { + return terror.WithScope(err, terror.ScopeDownstream) + } + } + + err = l.checkPoint.ResetConn() + if err != nil { + return terror.WithScope(err, terror.ScopeDownstream) + } + + return nil +} + // Update implements Unit.Update // now, only support to update config for routes, filters, column-mappings, black-white-list // now no config diff implemented, so simply re-init use new config diff --git a/syncer/db.go b/syncer/db.go index 1f03c76ccf..cafdcd7649 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -143,11 +143,8 @@ type DBConn struct { resetConnFn func(*tcontext.Context) (*conn.BaseConn, error) } -// ResetConn reset one worker connection from specify *BaseDB +// resetConn reset one worker connection from specify *BaseDB func (conn *DBConn) resetConn(tctx *tcontext.Context) error { - if conn == nil { - return terror.ErrDBDriverError.Generate("database not valid") - } dbConn, err := conn.resetConnFn(tctx) if err != nil { return err @@ -270,7 +267,7 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config dbConn, err := db.GetBaseConn(tctx.Context()) if err != nil { closeBaseDB(tctx, db) - return nil, nil, terror.WithScope(terror.ErrDBBadConn.Delegate(err), terror.ScopeDownstream) + return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { err := db.CloseBaseConn(dbConn) From 457b8562a2cea02345f7461d65f9576feb4d3896 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 8 Oct 2019 11:13:04 +0800 Subject: [PATCH 31/50] address comment --- syncer/sharding_group.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index 206e3b798b..a95b43ef85 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -457,7 +457,7 @@ func (k *ShardingGroupKeeper) AddGroup(targetSchema, targetTable string, sourceI func (k *ShardingGroupKeeper) Init() error { k.clear() sgkDB := k.cfg.To - sgkDB.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) + sgkDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) db, dbConns, err := createConns(k.tctx, k.cfg, sgkDB, 1) if err != nil { return err From dec0aebdff83e9b7152fcea054913f81e2f19e39 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 8 Oct 2019 11:47:16 +0800 Subject: [PATCH 32/50] add comments for BaseConn --- pkg/conn/baseconn.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 0905cc9e00..f835df4b7a 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -26,6 +26,31 @@ import ( ) // BaseConn wraps a connection to DB +// For Syncer and Loader Unit, they both have different amount of connections due to config +// Currently we have some types of connections exist +// Syncer: +// Worker Connection: +// DML conenction: +// execute some DML on Downstream DB, one unit has `syncer.WorkerCount` worker connections +// DDL Connection: +// execute some DDL on Downstream DB, one unit has one connection +// CheckPoint Connection: +// interact with CheckPoint DB, one unit has one connection +// OnlineDDL connection: +// interact with Online DDL DB, one unit has one connection +// ShardGroupKeeper connection: +// interact with ShardGroupKeeper DB, one unit has one connection +// +// Loader: +// Worker Connection: +// execute some DML to Downstream DB, one unit has `loader.PoolSize` worker connections +// CheckPoint Connection: +// interact with CheckPoint DB, one unit has one connection +// restore Connection: +// only use to create schema and table in restoreData, ignore already exists error, one unit has one connection +// +// each connection should have ability to retry on some common errors (e.g. tmysql.ErrTiKVServerTimeout) or maybe some specify errors in the future +// and each connection also should have ability to reset itself during some specify connection error (e.g. driver.ErrBadConn) type BaseConn struct { DBConn *sql.Conn From 5953333db77058290a77396510f023603084574c Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 8 Oct 2019 13:02:49 +0800 Subject: [PATCH 33/50] update comments --- pkg/conn/baseconn.go | 3 ++- syncer/db.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index f835df4b7a..519ce6c957 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -47,7 +47,8 @@ import ( // CheckPoint Connection: // interact with CheckPoint DB, one unit has one connection // restore Connection: -// only use to create schema and table in restoreData, ignore already exists error, one unit has one connection +// only use to create schema and table in restoreData, +// it ignore already exists error and it should be removed after use, one unit has one connection // // each connection should have ability to retry on some common errors (e.g. tmysql.ErrTiKVServerTimeout) or maybe some specify errors in the future // and each connection also should have ability to reset itself during some specify connection error (e.g. driver.ErrBadConn) diff --git a/syncer/db.go b/syncer/db.go index cafdcd7649..f15ab8c324 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -72,7 +72,7 @@ type binlogSize struct { func createBaseDB(dbCfg config.DBConfig) (*conn.BaseDB, error) { db, err := conn.DefaultDBProvider.Apply(dbCfg) if err != nil { - return nil, err + return nil, terror.WithScope(err, terror.ScopeDownstream) } return db, nil } From 21d85231a5f9cfdf52ea8cb0ab0f7d6e80adafdd Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 9 Oct 2019 18:34:10 +0800 Subject: [PATCH 34/50] address comments --- loader/db.go | 21 ++++++++++++--------- pkg/conn/baseconn.go | 2 +- syncer/db.go | 12 ++++++------ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/loader/db.go b/loader/db.go index b51f704a55..2510f8e9c3 100644 --- a/loader/db.go +++ b/loader/db.go @@ -40,7 +40,7 @@ type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn - resetConnFn func(*tcontext.Context) (*conn.BaseConn, error) + resetConnFn func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) } func (conn *DBConn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { @@ -117,12 +117,15 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... } return true } - ctx.L().Warn("execute statements", zap.Int("retry", retryTime), - zap.String("queries", utils.TruncateInterface(queries, -1)), - zap.String("arguments", utils.TruncateInterface(args, -1)), - log.ShortError(err)) - tidbExecutionErrorCounter.WithLabelValues(conn.cfg.Name).Inc() - return retry.IsRetryableError(err) + if retry.IsRetryableError(err) { + ctx.L().Warn("execute statements", zap.Int("retry", retryTime), + zap.String("queries", utils.TruncateInterface(queries, -1)), + zap.String("arguments", utils.TruncateInterface(args, -1)), + log.ShortError(err)) + tidbExecutionErrorCounter.WithLabelValues(conn.cfg.Name).Inc() + return true + } + return false }, } @@ -165,7 +168,7 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... // resetConn reset one worker connection from specify *BaseDB func (conn *DBConn) resetConn(tctx *tcontext.Context) error { - dbConn, err := conn.resetConnFn(tctx) + dbConn, err := conn.resetConnFn(tctx, conn.baseConn) if err != nil { return err } @@ -188,7 +191,7 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount } return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } - resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { + resetConnFn := func(tctx *tcontext.Context, baseConn *conn.BaseConn) (*conn.BaseConn, error) { err := baseDB.CloseBaseConn(baseConn) if err != nil { return nil, err diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 519ce6c957..6dfbd9d088 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -30,7 +30,7 @@ import ( // Currently we have some types of connections exist // Syncer: // Worker Connection: -// DML conenction: +// DML connection: // execute some DML on Downstream DB, one unit has `syncer.WorkerCount` worker connections // DDL Connection: // execute some DDL on Downstream DB, one unit has one connection diff --git a/syncer/db.go b/syncer/db.go index f15ab8c324..ad9b12a2c5 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -140,12 +140,12 @@ type DBConn struct { baseConn *conn.BaseConn // for workerConn to reset baseConn - resetConnFn func(*tcontext.Context) (*conn.BaseConn, error) + resetConnFn func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) } // resetConn reset one worker connection from specify *BaseDB func (conn *DBConn) resetConn(tctx *tcontext.Context) error { - dbConn, err := conn.resetConnFn(tctx) + dbConn, err := conn.resetConnFn(tctx, conn.baseConn) if err != nil { return err } @@ -264,19 +264,19 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config return nil, nil, err } for i := 0; i < count; i++ { - dbConn, err := db.GetBaseConn(tctx.Context()) + baseConn, err := db.GetBaseConn(tctx.Context()) if err != nil { closeBaseDB(tctx, db) return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } - resetConnFn := func(tctx *tcontext.Context) (*conn.BaseConn, error) { - err := db.CloseBaseConn(dbConn) + resetConnFn := func(tctx *tcontext.Context, baseConn *conn.BaseConn) (*conn.BaseConn, error) { + err := db.CloseBaseConn(baseConn) if err != nil { return nil, err } return db.GetBaseConn(tctx.Context()) } - conns = append(conns, &DBConn{baseConn: dbConn, cfg: cfg, resetConnFn: resetConnFn}) + conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}) } return db, conns, nil } From 17c05949bed564d56884962aeaf3bea049f37148 Mon Sep 17 00:00:00 2001 From: luancheng Date: Thu, 10 Oct 2019 14:39:55 +0800 Subject: [PATCH 35/50] address comments --- loader/checkpoint_test.go | 3 ++- loader/db.go | 2 +- pkg/conn/basedb.go | 7 ++----- syncer/db.go | 1 + tidb-slow.log | 26 ++++++++++++++++++++++++++ 5 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 tidb-slow.log diff --git a/loader/checkpoint_test.go b/loader/checkpoint_test.go index 42a5fac47f..e8ca933188 100644 --- a/loader/checkpoint_test.go +++ b/loader/checkpoint_test.go @@ -117,7 +117,8 @@ func (t *testCheckPointSuite) TestForDB(c *C) { c.Assert(err, IsNil) conn := conns[0] defer func() { - db.Close() + err := db.Close() + c.Assert(err, IsNil) }() for _, cs := range cases { sql2 := cp.GenSQL(cs.filename, cs.endPos) diff --git a/loader/db.go b/loader/db.go index 2510f8e9c3..15c45a24a7 100644 --- a/loader/db.go +++ b/loader/db.go @@ -106,6 +106,7 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... FirstRetryDuration: 2 * time.Second, BackoffStrategy: retry.LinearIncrease, IsRetryableFn: func(retryTime int, err error) bool { + tidbExecutionErrorCounter.WithLabelValues(conn.cfg.Name).Inc() if retry.IsConnectionError(err) { err = conn.resetConn(ctx) if err != nil { @@ -122,7 +123,6 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... zap.String("queries", utils.TruncateInterface(queries, -1)), zap.String("arguments", utils.TruncateInterface(args, -1)), log.ShortError(err)) - tidbExecutionErrorCounter.WithLabelValues(conn.cfg.Name).Inc() return true } return false diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 167af6c55c..1dcb049339 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -102,11 +102,8 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { func (d *BaseDB) CloseBaseConn(conn *BaseConn) error { d.mu.Lock() defer d.mu.Unlock() - if _, ok := d.conns[conn]; ok { - delete(d.conns, conn) - return conn.close() - } - return nil + delete(d.conns, conn) + return conn.close() } // Close release baseDB resource diff --git a/syncer/db.go b/syncer/db.go index ad9b12a2c5..09620a6448 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -214,6 +214,7 @@ func (conn *DBConn) executeSQLWithIgnore(tctx *tcontext.Context, ignoreError fun log.ShortError(err)) return false } + sqlRetriesTotal.WithLabelValues("stmt_exec", conn.cfg.Name).Add(1) return true } if retry.IsRetryableError(err) { diff --git a/tidb-slow.log b/tidb-slow.log new file mode 100644 index 0000000000..f16932a388 --- /dev/null +++ b/tidb-slow.log @@ -0,0 +1,26 @@ +# Time: 2019-10-10T01:23:34.437262+08:00 +# Txn_start_ts: 411734327725654016 +# Query_time: 0.31415633 +# Request_count: 2 +# Index_ids: [1] +# Is_internal: true +# Digest: 7d757d38b465483f07a597132f3a973125e78d6db2f42a0d1d8c7c0ec575f500 +# Stats: GLOBAL_VARIABLES:pseudo +# Num_cop_tasks: 2 +# Cop_proc_avg: 0 Cop_proc_p90: 0 Cop_proc_max: 0 Cop_proc_addr: store1 +# Cop_wait_avg: 0 Cop_wait_p90: 0 Cop_wait_max: 0 Cop_wait_addr: store1 +# Mem_max: 9248 +select variable_name, variable_value from mysql.global_variables where variable_name in ('tidb_auto_analyze_ratio', 'tidb_auto_analyze_start_time', 'tidb_auto_analyze_end_time'); +# Time: 2019-10-10T01:23:34.43876+08:00 +# Txn_start_ts: 411734327735615488 +# Query_time: 0.301539165 +# Request_count: 1 +# Index_ids: [2] +# Is_internal: true +# Digest: 28a4a8e02337290d2dbc6e123f8b3abf1aabd02f3dedbaf4fd0783ae118f7f8a +# Stats: bind_info:pseudo +# Num_cop_tasks: 1 +# Cop_proc_avg: 0 Cop_proc_p90: 0 Cop_proc_max: 0 Cop_proc_addr: store1 +# Cop_wait_avg: 0 Cop_wait_p90: 0 Cop_wait_max: 0 Cop_wait_addr: store1 +# Mem_max: 124 +select original_sql, bind_sql, default_db, status, create_time, update_time, charset, collation from mysql.bind_info where update_time >= "0000-00-00 00:00:00"; From 7e6abba349e0e5566a5fd160529abf567cef1868 Mon Sep 17 00:00:00 2001 From: luancheng Date: Fri, 11 Oct 2019 10:55:52 +0800 Subject: [PATCH 36/50] address comment --- .gitignore | 1 + tidb-slow.log | 26 -------------------------- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 tidb-slow.log diff --git a/.gitignore b/.gitignore index d61d0044f4..8549cc6115 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ dmctl.log relay_log/* vendor */*.DS_Store +tidb-slow.log diff --git a/tidb-slow.log b/tidb-slow.log deleted file mode 100644 index f16932a388..0000000000 --- a/tidb-slow.log +++ /dev/null @@ -1,26 +0,0 @@ -# Time: 2019-10-10T01:23:34.437262+08:00 -# Txn_start_ts: 411734327725654016 -# Query_time: 0.31415633 -# Request_count: 2 -# Index_ids: [1] -# Is_internal: true -# Digest: 7d757d38b465483f07a597132f3a973125e78d6db2f42a0d1d8c7c0ec575f500 -# Stats: GLOBAL_VARIABLES:pseudo -# Num_cop_tasks: 2 -# Cop_proc_avg: 0 Cop_proc_p90: 0 Cop_proc_max: 0 Cop_proc_addr: store1 -# Cop_wait_avg: 0 Cop_wait_p90: 0 Cop_wait_max: 0 Cop_wait_addr: store1 -# Mem_max: 9248 -select variable_name, variable_value from mysql.global_variables where variable_name in ('tidb_auto_analyze_ratio', 'tidb_auto_analyze_start_time', 'tidb_auto_analyze_end_time'); -# Time: 2019-10-10T01:23:34.43876+08:00 -# Txn_start_ts: 411734327735615488 -# Query_time: 0.301539165 -# Request_count: 1 -# Index_ids: [2] -# Is_internal: true -# Digest: 28a4a8e02337290d2dbc6e123f8b3abf1aabd02f3dedbaf4fd0783ae118f7f8a -# Stats: bind_info:pseudo -# Num_cop_tasks: 1 -# Cop_proc_avg: 0 Cop_proc_p90: 0 Cop_proc_max: 0 Cop_proc_addr: store1 -# Cop_wait_avg: 0 Cop_wait_p90: 0 Cop_wait_max: 0 Cop_wait_addr: store1 -# Mem_max: 124 -select original_sql, bind_sql, default_db, status, create_time, update_time, charset, collation from mysql.bind_info where update_time >= "0000-00-00 00:00:00"; From a7f91d8cedf649ad6848a7b2d91d73107043a6f1 Mon Sep 17 00:00:00 2001 From: luancheng Date: Fri, 11 Oct 2019 11:31:49 +0800 Subject: [PATCH 37/50] refine reset in baseConn --- loader/db.go | 18 ++----------- pkg/conn/baseconn.go | 53 ++++++++++++++++++++++++++++----------- pkg/conn/baseconn_test.go | 4 +-- pkg/conn/basedb.go | 6 ++--- syncer/checkpoint_test.go | 2 +- syncer/db.go | 19 ++------------ syncer/syncer_test.go | 18 ++++++------- 7 files changed, 58 insertions(+), 62 deletions(-) diff --git a/loader/db.go b/loader/db.go index 15c45a24a7..b0101d4bc2 100644 --- a/loader/db.go +++ b/loader/db.go @@ -39,8 +39,6 @@ import ( type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn - - resetConnFn func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) } func (conn *DBConn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { @@ -168,12 +166,7 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... // resetConn reset one worker connection from specify *BaseDB func (conn *DBConn) resetConn(tctx *tcontext.Context) error { - dbConn, err := conn.resetConnFn(tctx, conn.baseConn) - if err != nil { - return err - } - conn.baseConn = dbConn - return nil + return conn.baseConn.Reset(tctx) } func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*DBConn, error) { @@ -191,14 +184,7 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount } return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } - resetConnFn := func(tctx *tcontext.Context, baseConn *conn.BaseConn) (*conn.BaseConn, error) { - err := baseDB.CloseBaseConn(baseConn) - if err != nil { - return nil, err - } - return baseDB.GetBaseConn(tctx.Context()) - } - conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}) + conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg}) } return baseDB, conns, nil } diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 6dfbd9d088..4a1072f692 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -25,28 +25,36 @@ import ( "go.uber.org/zap" ) -// BaseConn wraps a connection to DB -// For Syncer and Loader Unit, they both have different amount of connections due to config -// Currently we have some types of connections exist -// Syncer: -// Worker Connection: +// BaseConn is the basic connection we use in dm +// BaseDB -> BaseConn correspond to sql.DB -> sql.Conn +// In our scenario, there are two main reasons why we need BaseConn +// 1. we often need one fixed DB connection to execute sql +// 2. we need own retry policy during execute failed +// So we split a fixed sql.Conn out of sql.DB, and wraps it to BaseConn +// And Similar with sql.Conn, all BaseConn generated from one BaseDB shares this BaseDB to reset +// +// Basic usage: +// For Syncer and Loader Unit, they both have different amount of connections due to config +// Currently we have some types of connections exist +// Syncer: +// Worker Connection: // DML connection: // execute some DML on Downstream DB, one unit has `syncer.WorkerCount` worker connections // DDL Connection: // execute some DDL on Downstream DB, one unit has one connection -// CheckPoint Connection: +// CheckPoint Connection: // interact with CheckPoint DB, one unit has one connection -// OnlineDDL connection: +// OnlineDDL connection: // interact with Online DDL DB, one unit has one connection -// ShardGroupKeeper connection: +// ShardGroupKeeper connection: // interact with ShardGroupKeeper DB, one unit has one connection // -// Loader: -// Worker Connection: +// Loader: +// Worker Connection: // execute some DML to Downstream DB, one unit has `loader.PoolSize` worker connections -// CheckPoint Connection: +// CheckPoint Connection: // interact with CheckPoint DB, one unit has one connection -// restore Connection: +// Restore Connection: // only use to create schema and table in restoreData, // it ignore already exists error and it should be removed after use, one unit has one connection // @@ -56,14 +64,31 @@ type BaseConn struct { DBConn *sql.Conn RetryStrategy retry.Strategy + + // for reset + ParentDB *BaseDB } // newBaseConn builds BaseConn to connect real DB -func newBaseConn(conn *sql.Conn, strategy retry.Strategy) *BaseConn { +func newBaseConn(conn *sql.Conn, strategy retry.Strategy, db *BaseDB) *BaseConn { if strategy == nil { strategy = &retry.FiniteRetryStrategy{} } - return &BaseConn{conn, strategy} + return &BaseConn{conn, strategy, db} +} + +// Reset resets this BaseConn, close old one and generate new *sql.Conn from its BaseDB +func (conn *BaseConn) Reset(tctx *tcontext.Context) error { + err := conn.ParentDB.CloseBaseConn(conn) + if err != nil { + return err + } + baseConn, err := conn.ParentDB.GetBaseConn(tctx.Context()) + if err != nil { + return err + } + conn.DBConn = baseConn.DBConn + return nil } // SetRetryStrategy set retry strategy for baseConn diff --git a/pkg/conn/baseconn_test.go b/pkg/conn/baseconn_test.go index 4da2347c02..1084a56ef5 100644 --- a/pkg/conn/baseconn_test.go +++ b/pkg/conn/baseconn_test.go @@ -35,7 +35,7 @@ type testBaseConnSuite struct { } func (t *testBaseConnSuite) TestBaseConn(c *C) { - baseConn := newBaseConn(nil, nil) + baseConn := newBaseConn(nil, nil, nil) tctx := tcontext.Background() err := baseConn.SetRetryStrategy(nil) @@ -52,7 +52,7 @@ func (t *testBaseConnSuite) TestBaseConn(c *C) { dbConn, err := db.Conn(tctx.Context()) c.Assert(err, IsNil) - baseConn = &BaseConn{dbConn, nil} + baseConn = &BaseConn{dbConn, nil, nil} err = baseConn.SetRetryStrategy(&retry.FiniteRetryStrategy{}) c.Assert(err, IsNil) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 1dcb049339..7257e28d1a 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -64,7 +64,7 @@ func (d *defaultDBProvider) Apply(config config.DBConfig) (*BaseDB, error) { return NewBaseDB(db), nil } -// BaseDB wraps *sql.DB +// BaseDB wraps *sql.DB, control the BaseConn type BaseDB struct { DB *sql.DB @@ -91,7 +91,7 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { if err != nil { return nil, terror.DBErrorAdapt(err, terror.ErrDBDriverError) } - baseConn := newBaseConn(conn, d.Retry) + baseConn := newBaseConn(conn, d.Retry, d) d.mu.Lock() defer d.mu.Unlock() d.conns[baseConn] = struct{}{} @@ -106,7 +106,7 @@ func (d *BaseDB) CloseBaseConn(conn *BaseConn) error { return conn.close() } -// Close release baseDB resource +// Close release ParentDB resource func (d *BaseDB) Close() error { if d == nil || d.DB == nil { return nil diff --git a/syncer/checkpoint_test.go b/syncer/checkpoint_test.go index 44ef5a6093..9145483dbf 100644 --- a/syncer/checkpoint_test.go +++ b/syncer/checkpoint_test.go @@ -92,7 +92,7 @@ func (s *testCheckpointSuite) TestCheckPoint(c *C) { dbConn, err := db.Conn(tcontext.Background().Context()) c.Assert(err, IsNil) - conn := &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} + conn := &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}} cp.(*RemoteCheckPoint).dbConn = conn err = cp.(*RemoteCheckPoint).prepare() diff --git a/syncer/db.go b/syncer/db.go index 09620a6448..a506b6330a 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -138,19 +138,11 @@ func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn - - // for workerConn to reset baseConn - resetConnFn func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) } // resetConn reset one worker connection from specify *BaseDB func (conn *DBConn) resetConn(tctx *tcontext.Context) error { - dbConn, err := conn.resetConnFn(tctx, conn.baseConn) - if err != nil { - return err - } - conn.baseConn = dbConn - return nil + return conn.baseConn.Reset(tctx) } func (conn *DBConn) querySQL(tctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { @@ -270,14 +262,7 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config closeBaseDB(tctx, db) return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } - resetConnFn := func(tctx *tcontext.Context, baseConn *conn.BaseConn) (*conn.BaseConn, error) { - err := db.CloseBaseConn(baseConn) - if err != nil { - return nil, err - } - return db.GetBaseConn(tctx.Context()) - } - conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg, resetConnFn: resetConnFn}) + conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg}) } return db, conns, nil } diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index 2c613678be..e3d27b91ef 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -938,7 +938,7 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} - syncer.toDBConns = []*DBConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} + syncer.toDBConns = []*DBConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}} syncer.reset() streamer, err := syncer.streamerProducer.generateStreamer(pos) @@ -1222,8 +1222,8 @@ func (s *testSyncerSuite) TestSharding(c *C) { syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(fromDB)} dbConn, err := db.Conn(ctx) c.Assert(err, IsNil) - syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} - syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}} // mock syncer.Init() function, because we need to pass mock dbs to different members' init syncer.genRouter() @@ -1233,12 +1233,12 @@ func (s *testSyncerSuite) TestSharding(c *C) { c.Assert(err, IsNil) // mock syncer.shardGroupkeeper.Init() function - syncer.sgk.dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}}} + syncer.sgk.dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}, nil}} syncer.sgk.prepare() syncer.initShardingGroups() // mock syncer.checkpoint.Init() function - syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}} + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}, nil}} syncer.checkpoint.(*RemoteCheckPoint).prepare() syncer.reset() @@ -1382,9 +1382,9 @@ func (s *testSyncerSuite) TestRun(c *C) { syncer := NewSyncer(s.cfg) syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} - syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}, - {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}}} - syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}}} + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}, + {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}} c.Assert(syncer.Type(), Equals, pb.UnitType_Sync) syncer.columnMapping, err = cm.NewMapping(s.cfg.CaseSensitive, s.cfg.ColumnMappingRules) @@ -1399,7 +1399,7 @@ func (s *testSyncerSuite) TestRun(c *C) { checkPointMock.ExpectCommit() // mock syncer.checkpoint.Init() function - syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}}} + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}, nil}} syncer.checkpoint.(*RemoteCheckPoint).prepare() syncer.reset() From 8b2be191a925613a6ed9c8a539d25b9aa3fae265 Mon Sep 17 00:00:00 2001 From: luancheng Date: Fri, 11 Oct 2019 15:24:57 +0800 Subject: [PATCH 38/50] address comments --- loader/checkpoint_test.go | 2 +- pkg/conn/baseconn.go | 12 ++++++------ pkg/conn/baseconn_test.go | 2 +- pkg/conn/basedb.go | 4 ++-- syncer/checkpoint_test.go | 2 +- syncer/syncer_test.go | 18 +++++++++--------- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/loader/checkpoint_test.go b/loader/checkpoint_test.go index e8ca933188..7eeb93c7c3 100644 --- a/loader/checkpoint_test.go +++ b/loader/checkpoint_test.go @@ -117,7 +117,7 @@ func (t *testCheckPointSuite) TestForDB(c *C) { c.Assert(err, IsNil) conn := conns[0] defer func() { - err := db.Close() + err = db.Close() c.Assert(err, IsNil) }() for _, cs := range cases { diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 4a1072f692..d994e1be7d 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -66,11 +66,11 @@ type BaseConn struct { RetryStrategy retry.Strategy // for reset - ParentDB *BaseDB + parentDB *BaseDB } -// newBaseConn builds BaseConn to connect real DB -func newBaseConn(conn *sql.Conn, strategy retry.Strategy, db *BaseDB) *BaseConn { +// NewBaseConn builds BaseConn to connect real DB +func NewBaseConn(conn *sql.Conn, strategy retry.Strategy, db *BaseDB) *BaseConn { if strategy == nil { strategy = &retry.FiniteRetryStrategy{} } @@ -79,11 +79,11 @@ func newBaseConn(conn *sql.Conn, strategy retry.Strategy, db *BaseDB) *BaseConn // Reset resets this BaseConn, close old one and generate new *sql.Conn from its BaseDB func (conn *BaseConn) Reset(tctx *tcontext.Context) error { - err := conn.ParentDB.CloseBaseConn(conn) + err := conn.parentDB.CloseBaseConn(conn) if err != nil { - return err + tctx.L().Warn("close connection during reset", log.ShortError(err)) } - baseConn, err := conn.ParentDB.GetBaseConn(tctx.Context()) + baseConn, err := conn.parentDB.GetBaseConn(tctx.Context()) if err != nil { return err } diff --git a/pkg/conn/baseconn_test.go b/pkg/conn/baseconn_test.go index 1084a56ef5..070f971a68 100644 --- a/pkg/conn/baseconn_test.go +++ b/pkg/conn/baseconn_test.go @@ -35,7 +35,7 @@ type testBaseConnSuite struct { } func (t *testBaseConnSuite) TestBaseConn(c *C) { - baseConn := newBaseConn(nil, nil, nil) + baseConn := NewBaseConn(nil, nil, nil) tctx := tcontext.Background() err := baseConn.SetRetryStrategy(nil) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index 7257e28d1a..aae9cde6d7 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -91,7 +91,7 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { if err != nil { return nil, terror.DBErrorAdapt(err, terror.ErrDBDriverError) } - baseConn := newBaseConn(conn, d.Retry, d) + baseConn := NewBaseConn(conn, d.Retry, d) d.mu.Lock() defer d.mu.Unlock() d.conns[baseConn] = struct{}{} @@ -106,7 +106,7 @@ func (d *BaseDB) CloseBaseConn(conn *BaseConn) error { return conn.close() } -// Close release ParentDB resource +// Close release *BaseDB resource func (d *BaseDB) Close() error { if d == nil || d.DB == nil { return nil diff --git a/syncer/checkpoint_test.go b/syncer/checkpoint_test.go index 9145483dbf..0cb6518745 100644 --- a/syncer/checkpoint_test.go +++ b/syncer/checkpoint_test.go @@ -92,7 +92,7 @@ func (s *testCheckpointSuite) TestCheckPoint(c *C) { dbConn, err := db.Conn(tcontext.Background().Context()) c.Assert(err, IsNil) - conn := &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}} + conn := &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)} cp.(*RemoteCheckPoint).dbConn = conn err = cp.(*RemoteCheckPoint).prepare() diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index e3d27b91ef..e29c84ca2e 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -938,7 +938,7 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} - syncer.toDBConns = []*DBConn{{baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}} + syncer.toDBConns = []*DBConn{{baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}} syncer.reset() streamer, err := syncer.streamerProducer.generateStreamer(pos) @@ -1222,8 +1222,8 @@ func (s *testSyncerSuite) TestSharding(c *C) { syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(fromDB)} dbConn, err := db.Conn(ctx) c.Assert(err, IsNil) - syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}} - syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}} + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)} // mock syncer.Init() function, because we need to pass mock dbs to different members' init syncer.genRouter() @@ -1233,12 +1233,12 @@ func (s *testSyncerSuite) TestSharding(c *C) { c.Assert(err, IsNil) // mock syncer.shardGroupkeeper.Init() function - syncer.sgk.dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{shardGroupDBConn, &retry.FiniteRetryStrategy{}, nil}} + syncer.sgk.dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(shardGroupDBConn, &retry.FiniteRetryStrategy{}, nil)} syncer.sgk.prepare() syncer.initShardingGroups() // mock syncer.checkpoint.Init() function - syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}, nil}} + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(checkPointDBConn, &retry.FiniteRetryStrategy{}, nil)} syncer.checkpoint.(*RemoteCheckPoint).prepare() syncer.reset() @@ -1382,9 +1382,9 @@ func (s *testSyncerSuite) TestRun(c *C) { syncer := NewSyncer(s.cfg) syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} - syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}, - {cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}}} - syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{dbConn, &retry.FiniteRetryStrategy{}, nil}} + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}, + {cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)} c.Assert(syncer.Type(), Equals, pb.UnitType_Sync) syncer.columnMapping, err = cm.NewMapping(s.cfg.CaseSensitive, s.cfg.ColumnMappingRules) @@ -1399,7 +1399,7 @@ func (s *testSyncerSuite) TestRun(c *C) { checkPointMock.ExpectCommit() // mock syncer.checkpoint.Init() function - syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: &conn.BaseConn{checkPointDBConn, &retry.FiniteRetryStrategy{}, nil}} + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(checkPointDBConn, &retry.FiniteRetryStrategy{}, nil)} syncer.checkpoint.(*RemoteCheckPoint).prepare() syncer.reset() From eac3868f8b0c85beb81c26cc78d049140003a545 Mon Sep 17 00:00:00 2001 From: luancheng Date: Sat, 12 Oct 2019 13:22:22 +0800 Subject: [PATCH 39/50] use reset closure to reset BaseConn --- loader/db.go | 19 +++++++++++++++++-- pkg/conn/baseconn.go | 21 ++------------------- pkg/conn/baseconn_test.go | 4 ++-- pkg/conn/basedb.go | 2 +- syncer/checkpoint_test.go | 2 +- syncer/db.go | 27 +++++++++++++++++++++------ syncer/syncer_test.go | 18 +++++++++--------- 7 files changed, 53 insertions(+), 40 deletions(-) diff --git a/loader/db.go b/loader/db.go index b0101d4bc2..cf70b338ab 100644 --- a/loader/db.go +++ b/loader/db.go @@ -39,6 +39,9 @@ import ( type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn + + // generate new BaseConn and close old one + resetBaseConnFn func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) } func (conn *DBConn) querySQL(ctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { @@ -166,7 +169,12 @@ func (conn *DBConn) executeSQL(ctx *tcontext.Context, queries []string, args ... // resetConn reset one worker connection from specify *BaseDB func (conn *DBConn) resetConn(tctx *tcontext.Context) error { - return conn.baseConn.Reset(tctx) + baseConn, err := conn.resetBaseConnFn(tctx, conn.baseConn) + if err != nil { + return err + } + conn.baseConn = baseConn + return nil } func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount int) (*conn.BaseDB, []*DBConn, error) { @@ -184,7 +192,14 @@ func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, workerCount } return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } - conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg}) + resetBaseConnFn := func(tctx *tcontext.Context, baseConn *conn.BaseConn) (*conn.BaseConn, error) { + err := baseDB.CloseBaseConn(baseConn) + if err != nil { + tctx.L().Warn("failed to close baseConn in reset") + } + return baseDB.GetBaseConn(tctx.Context()) + } + conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg, resetBaseConnFn: resetBaseConnFn}) } return baseDB, conns, nil } diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index d994e1be7d..5eb76f1626 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -64,31 +64,14 @@ type BaseConn struct { DBConn *sql.Conn RetryStrategy retry.Strategy - - // for reset - parentDB *BaseDB } // NewBaseConn builds BaseConn to connect real DB -func NewBaseConn(conn *sql.Conn, strategy retry.Strategy, db *BaseDB) *BaseConn { +func NewBaseConn(conn *sql.Conn, strategy retry.Strategy) *BaseConn { if strategy == nil { strategy = &retry.FiniteRetryStrategy{} } - return &BaseConn{conn, strategy, db} -} - -// Reset resets this BaseConn, close old one and generate new *sql.Conn from its BaseDB -func (conn *BaseConn) Reset(tctx *tcontext.Context) error { - err := conn.parentDB.CloseBaseConn(conn) - if err != nil { - tctx.L().Warn("close connection during reset", log.ShortError(err)) - } - baseConn, err := conn.parentDB.GetBaseConn(tctx.Context()) - if err != nil { - return err - } - conn.DBConn = baseConn.DBConn - return nil + return &BaseConn{conn, strategy} } // SetRetryStrategy set retry strategy for baseConn diff --git a/pkg/conn/baseconn_test.go b/pkg/conn/baseconn_test.go index 070f971a68..dce1cbc948 100644 --- a/pkg/conn/baseconn_test.go +++ b/pkg/conn/baseconn_test.go @@ -35,7 +35,7 @@ type testBaseConnSuite struct { } func (t *testBaseConnSuite) TestBaseConn(c *C) { - baseConn := NewBaseConn(nil, nil, nil) + baseConn := NewBaseConn(nil, nil) tctx := tcontext.Background() err := baseConn.SetRetryStrategy(nil) @@ -52,7 +52,7 @@ func (t *testBaseConnSuite) TestBaseConn(c *C) { dbConn, err := db.Conn(tctx.Context()) c.Assert(err, IsNil) - baseConn = &BaseConn{dbConn, nil, nil} + baseConn = &BaseConn{dbConn, nil} err = baseConn.SetRetryStrategy(&retry.FiniteRetryStrategy{}) c.Assert(err, IsNil) diff --git a/pkg/conn/basedb.go b/pkg/conn/basedb.go index aae9cde6d7..b48c97b0b3 100644 --- a/pkg/conn/basedb.go +++ b/pkg/conn/basedb.go @@ -91,7 +91,7 @@ func (d *BaseDB) GetBaseConn(ctx context.Context) (*BaseConn, error) { if err != nil { return nil, terror.DBErrorAdapt(err, terror.ErrDBDriverError) } - baseConn := NewBaseConn(conn, d.Retry, d) + baseConn := NewBaseConn(conn, d.Retry) d.mu.Lock() defer d.mu.Unlock() d.conns[baseConn] = struct{}{} diff --git a/syncer/checkpoint_test.go b/syncer/checkpoint_test.go index 0cb6518745..58228dae20 100644 --- a/syncer/checkpoint_test.go +++ b/syncer/checkpoint_test.go @@ -92,7 +92,7 @@ func (s *testCheckpointSuite) TestCheckPoint(c *C) { dbConn, err := db.Conn(tcontext.Background().Context()) c.Assert(err, IsNil) - conn := &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)} + conn := &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})} cp.(*RemoteCheckPoint).dbConn = conn err = cp.(*RemoteCheckPoint).prepare() diff --git a/syncer/db.go b/syncer/db.go index a506b6330a..f7021032e6 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -138,11 +138,19 @@ func closeUpstreamConn(tctx *tcontext.Context, conn *UpStreamConn) { type DBConn struct { cfg *config.SubTaskConfig baseConn *conn.BaseConn + + // generate new BaseConn and close old one + resetBaseConnFn func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) } // resetConn reset one worker connection from specify *BaseDB func (conn *DBConn) resetConn(tctx *tcontext.Context) error { - return conn.baseConn.Reset(tctx) + baseConn, err := conn.resetBaseConnFn(tctx, conn.baseConn) + if err != nil { + return err + } + conn.baseConn = baseConn + return nil } func (conn *DBConn) querySQL(tctx *tcontext.Context, query string, args ...interface{}) (*sql.Rows, error) { @@ -252,19 +260,26 @@ func (conn *DBConn) executeSQL(tctx *tcontext.Context, queries []string, args .. func createConns(tctx *tcontext.Context, cfg *config.SubTaskConfig, dbCfg config.DBConfig, count int) (*conn.BaseDB, []*DBConn, error) { conns := make([]*DBConn, 0, count) - db, err := createBaseDB(dbCfg) + baseDB, err := createBaseDB(dbCfg) if err != nil { return nil, nil, err } for i := 0; i < count; i++ { - baseConn, err := db.GetBaseConn(tctx.Context()) + baseConn, err := baseDB.GetBaseConn(tctx.Context()) if err != nil { - closeBaseDB(tctx, db) + closeBaseDB(tctx, baseDB) return nil, nil, terror.WithScope(err, terror.ScopeDownstream) } - conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg}) + resetBaseConnFn := func(tctx *tcontext.Context, baseConn *conn.BaseConn) (*conn.BaseConn, error) { + err := baseDB.CloseBaseConn(baseConn) + if err != nil { + tctx.L().Warn("failed to close baseConn in reset") + } + return baseDB.GetBaseConn(tctx.Context()) + } + conns = append(conns, &DBConn{baseConn: baseConn, cfg: cfg, resetBaseConnFn: resetBaseConnFn}) } - return db, conns, nil + return baseDB, conns, nil } func getTableIndex(tctx *tcontext.Context, conn *DBConn, table *table) error { diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index e29c84ca2e..ea5c2b02b0 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -938,7 +938,7 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} - syncer.toDBConns = []*DBConn{{baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}} + syncer.toDBConns = []*DBConn{{baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})}} syncer.reset() streamer, err := syncer.streamerProducer.generateStreamer(pos) @@ -1222,8 +1222,8 @@ func (s *testSyncerSuite) TestSharding(c *C) { syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(fromDB)} dbConn, err := db.Conn(ctx) c.Assert(err, IsNil) - syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}} - syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)} + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})} // mock syncer.Init() function, because we need to pass mock dbs to different members' init syncer.genRouter() @@ -1233,12 +1233,12 @@ func (s *testSyncerSuite) TestSharding(c *C) { c.Assert(err, IsNil) // mock syncer.shardGroupkeeper.Init() function - syncer.sgk.dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(shardGroupDBConn, &retry.FiniteRetryStrategy{}, nil)} + syncer.sgk.dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(shardGroupDBConn, &retry.FiniteRetryStrategy{})} syncer.sgk.prepare() syncer.initShardingGroups() // mock syncer.checkpoint.Init() function - syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(checkPointDBConn, &retry.FiniteRetryStrategy{}, nil)} + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(checkPointDBConn, &retry.FiniteRetryStrategy{})} syncer.checkpoint.(*RemoteCheckPoint).prepare() syncer.reset() @@ -1382,9 +1382,9 @@ func (s *testSyncerSuite) TestRun(c *C) { syncer := NewSyncer(s.cfg) syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} - syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}, - {cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)}} - syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{}, nil)} + syncer.toDBConns = []*DBConn{{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})}, + {cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})}} + syncer.ddlDBConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})} c.Assert(syncer.Type(), Equals, pb.UnitType_Sync) syncer.columnMapping, err = cm.NewMapping(s.cfg.CaseSensitive, s.cfg.ColumnMappingRules) @@ -1399,7 +1399,7 @@ func (s *testSyncerSuite) TestRun(c *C) { checkPointMock.ExpectCommit() // mock syncer.checkpoint.Init() function - syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(checkPointDBConn, &retry.FiniteRetryStrategy{}, nil)} + syncer.checkpoint.(*RemoteCheckPoint).dbConn = &DBConn{cfg: s.cfg, baseConn: conn.NewBaseConn(checkPointDBConn, &retry.FiniteRetryStrategy{})} syncer.checkpoint.(*RemoteCheckPoint).prepare() syncer.reset() From 032912b1edc2b85de3ad777beb682bbb2332d5cd Mon Sep 17 00:00:00 2001 From: luancheng Date: Sat, 12 Oct 2019 17:34:09 +0800 Subject: [PATCH 40/50] address comment --- loader/db.go | 11 +++++++++++ syncer/db.go | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/loader/db.go b/loader/db.go index cf70b338ab..989b457ca1 100644 --- a/loader/db.go +++ b/loader/db.go @@ -54,6 +54,17 @@ func (conn *DBConn) querySQL(ctx *tcontext.Context, query string, args ...interf FirstRetryDuration: time.Second, BackoffStrategy: retry.Stable, IsRetryableFn: func(retryTime int, err error) bool { + if retry.IsConnectionError(err) { + err = conn.resetConn(ctx) + if err != nil { + ctx.L().Error("reset connection failed", zap.Int("retry", retryTime), + zap.String("query", utils.TruncateInterface(query, -1)), + zap.String("arguments", utils.TruncateInterface(args, -1)), + log.ShortError(err)) + return false + } + return true + } if retry.IsRetryableError(err) { ctx.L().Warn("query statement", zap.Int("retry", retryTime), zap.String("query", utils.TruncateString(query, -1)), diff --git a/syncer/db.go b/syncer/db.go index f7021032e6..de06df2303 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -162,6 +162,18 @@ func (conn *DBConn) querySQL(tctx *tcontext.Context, query string, args ...inter FirstRetryDuration: retryTimeout, BackoffStrategy: retry.Stable, IsRetryableFn: func(retryTime int, err error) bool { + if retry.IsConnectionError(err) { + err = conn.resetConn(tctx) + if err != nil { + tctx.L().Error("reset connection failed", zap.Int("retry", retryTime), + zap.String("query", utils.TruncateInterface(query, -1)), + zap.String("arguments", utils.TruncateInterface(args, -1)), + log.ShortError(err)) + return false + } + sqlRetriesTotal.WithLabelValues("query", conn.cfg.Name).Add(1) + return true + } if retry.IsRetryableError(err) { tctx.L().Warn("query statement", zap.Int("retry", retryTime), zap.String("query", utils.TruncateString(query, -1)), From 1b9e35175ec00813ac73298de9559b20b04c220d Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 14 Oct 2019 16:07:48 +0800 Subject: [PATCH 41/50] address comments --- dm/config/subtask.go | 11 ++++++++--- loader/loader.go | 9 ++++++++- syncer/checkpoint.go | 2 +- syncer/ghost.go | 5 +++++ syncer/online_ddl.go | 9 ++++++++- syncer/pt_osc.go | 5 +++++ syncer/sharding_group.go | 2 +- syncer/syncer.go | 22 ++++++++++++++++------ syncer/syncer_test.go | 1 + 9 files changed, 53 insertions(+), 13 deletions(-) diff --git a/dm/config/subtask.go b/dm/config/subtask.go index fb8eef3e50..e990ee22b3 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -53,14 +53,19 @@ type RawDBConfig struct { } // DefaultRawDBConfig returns a default raw database config -func DefaultRawDBConfig(readTimeout string) *RawDBConfig { +func DefaultRawDBConfig() *RawDBConfig { return &RawDBConfig{ - ReadTimeout: readTimeout, MaxIdleConns: defaultMaxIdleConns, } } -// SetWriteTimeout adds writeTimeout for raw database +// SetReadTimeout set readTimeout for raw database +func (c *RawDBConfig) SetReadTimeout(readTimeout string) *RawDBConfig { + c.ReadTimeout = readTimeout + return c +} + +// SetWriteTimeout set writeTimeout for raw database func (c *RawDBConfig) SetWriteTimeout(writeTimeout string) *RawDBConfig { c.WriteTimeout = writeTimeout return c diff --git a/loader/loader.go b/loader/loader.go index 6e2c5e87a9..b277668772 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -46,6 +46,8 @@ import ( var ( jobCount = 1000 + + infiniteReadTimeout = "0" ) // FilePosSet represents a set in mathematics. @@ -406,6 +408,10 @@ func (l *Loader) Init() (err error) { } } + dbCfg := l.cfg.To + dbCfg.RawDBCfg = config.DefaultRawDBConfig(). + SetMaxIdleConns(l.cfg.PoolSize) + l.toDB, l.toDBConns, err = createConns(l.tctx, l.cfg, l.cfg.PoolSize) if err != nil { return err @@ -1009,8 +1015,9 @@ func (l *Loader) restoreData(ctx context.Context) error { } defer l.toDB.CloseBaseConn(baseConn) dbConn := &DBConn{ - baseConn: baseConn, cfg: l.cfg, + baseConn: baseConn, + resetBaseConnFn: nil, } dispatchMap := make(map[string]*fileJob) diff --git a/syncer/checkpoint.go b/syncer/checkpoint.go index 7449feabea..c4a1d7c0f0 100644 --- a/syncer/checkpoint.go +++ b/syncer/checkpoint.go @@ -231,7 +231,7 @@ func NewRemoteCheckPoint(tctx *tcontext.Context, cfg *config.SubTaskConfig, id s // Init implements CheckPoint.Init func (cp *RemoteCheckPoint) Init() error { checkPointDB := cp.cfg.To - checkPointDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + checkPointDB.RawDBCfg = config.DefaultRawDBConfig().SetReadTimeout(maxCheckPointTimeout) db, dbConns, err := createConns(cp.tctx, cp.cfg, checkPointDB, 1) if err != nil { return err diff --git a/syncer/ghost.go b/syncer/ghost.go index fbe0a87e89..9dcd26d4ce 100644 --- a/syncer/ghost.go +++ b/syncer/ghost.go @@ -175,3 +175,8 @@ func (g *Ghost) Clear() error { func (g *Ghost) Close() { g.storge.Close() } + +// ResetConn implements interface +func (g *Ghost) ResetConn() error { + return g.storge.ResetConn() +} diff --git a/syncer/online_ddl.go b/syncer/online_ddl.go index a1273c9d56..262c8a868a 100644 --- a/syncer/online_ddl.go +++ b/syncer/online_ddl.go @@ -51,6 +51,8 @@ type OnlinePlugin interface { TableType(table string) TableType // RealName returns real table name that removed ghost suffix and handled by table router RealName(schema, table string) (string, string) + // ResetConn reset db connection + ResetConn() error // Clear clears all online information Clear() error // Close closes online ddl plugin @@ -109,7 +111,7 @@ func NewOnlineDDLStorage(newtctx *tcontext.Context, cfg *config.SubTaskConfig) * // Init initials online handler func (s *OnlineDDLStorage) Init() error { onlineDB := s.cfg.To - onlineDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + onlineDB.RawDBCfg = config.DefaultRawDBConfig().SetReadTimeout(maxCheckPointTimeout) db, dbConns, err := createConns(s.tctx, s.cfg, onlineDB, 1) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) @@ -250,6 +252,11 @@ func (s *OnlineDDLStorage) Clear() error { return nil } +// ResetConn implements CheckPoint.ResetConn +func (s *OnlineDDLStorage) ResetConn() error { + return s.dbConn.resetConn(s.tctx) +} + // Close closes database connection func (s *OnlineDDLStorage) Close() { s.Lock() diff --git a/syncer/pt_osc.go b/syncer/pt_osc.go index 3837e5da20..ba681aa1fc 100644 --- a/syncer/pt_osc.go +++ b/syncer/pt_osc.go @@ -175,3 +175,8 @@ func (p *PT) Clear() error { func (p *PT) Close() { p.storge.Close() } + +// ResetConn implements interface +func (p *PT) ResetConn() error { + return p.storge.ResetConn() +} diff --git a/syncer/sharding_group.go b/syncer/sharding_group.go index a95b43ef85..0f4f1281a7 100644 --- a/syncer/sharding_group.go +++ b/syncer/sharding_group.go @@ -457,7 +457,7 @@ func (k *ShardingGroupKeeper) AddGroup(targetSchema, targetTable string, sourceI func (k *ShardingGroupKeeper) Init() error { k.clear() sgkDB := k.cfg.To - sgkDB.RawDBCfg = config.DefaultRawDBConfig(maxCheckPointTimeout) + sgkDB.RawDBCfg = config.DefaultRawDBConfig().SetReadTimeout(maxCheckPointTimeout) db, dbConns, err := createConns(k.tctx, k.cfg, sgkDB, 1) if err != nil { return err diff --git a/syncer/syncer.go b/syncer/syncer.go index 5c2153f954..395835c5ae 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -516,6 +516,16 @@ func (s *Syncer) resetDBs() error { } } + err = s.onlineDDL.ResetConn() + if err != nil { + return terror.WithScope(err, terror.ScopeDownstream) + } + + err = s.sgk.dbConn.resetConn(s.tctx) + if err != nil { + return terror.WithScope(err, terror.ScopeDownstream) + } + err = s.ddlDBConn.resetConn(s.tctx) if err != nil { return terror.WithScope(err, terror.ScopeDownstream) @@ -665,8 +675,7 @@ func (s *Syncer) getTable(schema string, table string) (*table, []string, error) return value, s.cacheColumns[key], nil } - db := s.toDBConns[len(s.toDBConns)-1] - t, err := s.getTableFromDB(db, schema, table) + t, err := s.getTableFromDB(s.ddlDBConn, schema, table) if err != nil { return nil, nil, err } @@ -2024,14 +2033,15 @@ func (s *Syncer) printStatus(ctx context.Context) { func (s *Syncer) createDBs() error { var err error dbCfg := s.cfg.From - dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout) + dbCfg.RawDBCfg = config.DefaultRawDBConfig().SetReadTimeout(maxDMLConnectionTimeout) s.fromDB, err = createUpStreamConn(dbCfg) if err != nil { return err } dbCfg = s.cfg.To - dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout). + dbCfg.RawDBCfg = config.DefaultRawDBConfig(). + SetReadTimeout(maxDMLConnectionTimeout). SetMaxIdleConns(s.cfg.WorkerCount) s.toDB, s.toDBConns, err = createConns(s.tctx, s.cfg, dbCfg, s.cfg.WorkerCount) @@ -2041,7 +2051,7 @@ func (s *Syncer) createDBs() error { } // baseConn for ddl dbCfg = s.cfg.To - dbCfg.RawDBCfg = config.DefaultRawDBConfig(maxDDLConnectionTimeout) + dbCfg.RawDBCfg = config.DefaultRawDBConfig().SetReadTimeout(maxDDLConnectionTimeout) var ddlDBConns []*DBConn s.ddlDB, ddlDBConns, err = createConns(s.tctx, s.cfg, dbCfg, 1) @@ -2416,7 +2426,7 @@ func (s *Syncer) UpdateFromConfig(cfg *config.SubTaskConfig) error { s.cfg.From = cfg.From var err error - s.cfg.From.RawDBCfg = config.DefaultRawDBConfig(maxDMLConnectionTimeout) + s.cfg.From.RawDBCfg = config.DefaultRawDBConfig().SetReadTimeout(maxDMLConnectionTimeout) s.fromDB, err = createUpStreamConn(s.cfg.From) if err != nil { s.tctx.L().Error("fail to create baseConn connection", log.ShortError(err)) diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index ea5c2b02b0..2f0210abb1 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -938,6 +938,7 @@ func (s *testSyncerSuite) TestGeneratedColumn(c *C) { dbConn, err := db.Conn(context.Background()) c.Assert(err, IsNil) syncer.fromDB = &UpStreamConn{BaseDB: conn.NewBaseDB(db)} + syncer.ddlDBConn = &DBConn{baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})} syncer.toDBConns = []*DBConn{{baseConn: conn.NewBaseConn(dbConn, &retry.FiniteRetryStrategy{})}} syncer.reset() From 92459f46045aa354a41fed07e85d9c29a8399f64 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 14 Oct 2019 16:20:20 +0800 Subject: [PATCH 42/50] fix test --- loader/loader.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/loader/loader.go b/loader/loader.go index b277668772..dd1c246282 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -46,8 +46,6 @@ import ( var ( jobCount = 1000 - - infiniteReadTimeout = "0" ) // FilePosSet represents a set in mathematics. @@ -1015,8 +1013,8 @@ func (l *Loader) restoreData(ctx context.Context) error { } defer l.toDB.CloseBaseConn(baseConn) dbConn := &DBConn{ - cfg: l.cfg, - baseConn: baseConn, + cfg: l.cfg, + baseConn: baseConn, resetBaseConnFn: nil, } From ad11983a6871af28495235acf2e393eea5643589 Mon Sep 17 00:00:00 2001 From: luancheng Date: Mon, 14 Oct 2019 17:51:50 +0800 Subject: [PATCH 43/50] fix test --- syncer/syncer.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/syncer/syncer.go b/syncer/syncer.go index 395835c5ae..18100c6363 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -516,14 +516,18 @@ func (s *Syncer) resetDBs() error { } } - err = s.onlineDDL.ResetConn() - if err != nil { - return terror.WithScope(err, terror.ScopeDownstream) + if s.onlineDDL != nil { + err = s.onlineDDL.ResetConn() + if err != nil { + return terror.WithScope(err, terror.ScopeDownstream) + } } - err = s.sgk.dbConn.resetConn(s.tctx) - if err != nil { - return terror.WithScope(err, terror.ScopeDownstream) + if s.sgk != nil { + err = s.sgk.dbConn.resetConn(s.tctx) + if err != nil { + return terror.WithScope(err, terror.ScopeDownstream) + } } err = s.ddlDBConn.resetConn(s.tctx) From 35376e75ddfce73d16a6b40ae1697f27095fa6a7 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 15 Oct 2019 10:58:49 +0800 Subject: [PATCH 44/50] address comment --- dm/config/subtask.go | 6 +++--- syncer/online_ddl.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dm/config/subtask.go b/dm/config/subtask.go index e990ee22b3..bb3a0a03cb 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -59,19 +59,19 @@ func DefaultRawDBConfig() *RawDBConfig { } } -// SetReadTimeout set readTimeout for raw database +// SetReadTimeout set readTimeout for raw database config func (c *RawDBConfig) SetReadTimeout(readTimeout string) *RawDBConfig { c.ReadTimeout = readTimeout return c } -// SetWriteTimeout set writeTimeout for raw database +// SetWriteTimeout set writeTimeout for raw database config func (c *RawDBConfig) SetWriteTimeout(writeTimeout string) *RawDBConfig { c.WriteTimeout = writeTimeout return c } -// SetMaxIdleConns set maxIdleConns for raw database +// SetMaxIdleConns set maxIdleConns for raw database config // set value <= 0 then no idle connections are retained. // set value > 0 then `value` idle connections are retained. func (c *RawDBConfig) SetMaxIdleConns(value int) *RawDBConfig { diff --git a/syncer/online_ddl.go b/syncer/online_ddl.go index 262c8a868a..c5f93c4bf2 100644 --- a/syncer/online_ddl.go +++ b/syncer/online_ddl.go @@ -252,7 +252,7 @@ func (s *OnlineDDLStorage) Clear() error { return nil } -// ResetConn implements CheckPoint.ResetConn +// ResetConn implements OnlinePlugin.ResetConn func (s *OnlineDDLStorage) ResetConn() error { return s.dbConn.resetConn(s.tctx) } From 26190cc4ac2522663c9e7547dd969789087b4f2e Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 15 Oct 2019 12:13:21 +0800 Subject: [PATCH 45/50] address comment --- loader/loader.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/loader/loader.go b/loader/loader.go index dd1c246282..68efbf6186 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -1015,7 +1015,9 @@ func (l *Loader) restoreData(ctx context.Context) error { dbConn := &DBConn{ cfg: l.cfg, baseConn: baseConn, - resetBaseConnFn: nil, + resetBaseConnFn: func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) { + return nil, terror.ErrDBBadConn.Generate("bad connection error restoreData") + }, } dispatchMap := make(map[string]*fileJob) From b8f4a924fcc6238933921364dea277c8492852a8 Mon Sep 17 00:00:00 2001 From: luancheng Date: Tue, 15 Oct 2019 13:05:24 +0800 Subject: [PATCH 46/50] fix test --- loader/loader.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loader/loader.go b/loader/loader.go index 68efbf6186..2f226f9cd2 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -1013,8 +1013,8 @@ func (l *Loader) restoreData(ctx context.Context) error { } defer l.toDB.CloseBaseConn(baseConn) dbConn := &DBConn{ - cfg: l.cfg, - baseConn: baseConn, + cfg: l.cfg, + baseConn: baseConn, resetBaseConnFn: func(*tcontext.Context, *conn.BaseConn) (*conn.BaseConn, error) { return nil, terror.ErrDBBadConn.Generate("bad connection error restoreData") }, From b8afd4d479efdf812569482efb9255abe9e32298 Mon Sep 17 00:00:00 2001 From: luancheng Date: Wed, 16 Oct 2019 11:47:12 +0800 Subject: [PATCH 47/50] return commit err index --- pkg/conn/baseconn.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 5eb76f1626..91df5423ca 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -161,7 +161,7 @@ func (conn *BaseConn) ExecuteSQLWithIgnoreError(tctx *tcontext.Context, ignoreEr } err = txn.Commit() if err != nil { - return l, terror.ErrDBExecuteFailed.Delegate(err, "commit") + return l - 1, terror.ErrDBExecuteFailed.Delegate(err, "commit") } return l, nil } From 0a62846ecaa18bf68f895ec1ce026b1d799dbee3 Mon Sep 17 00:00:00 2001 From: luancheng Date: Thu, 17 Oct 2019 13:13:50 +0800 Subject: [PATCH 48/50] fix test --- syncer/db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncer/db.go b/syncer/db.go index c105e29a42..7031366913 100644 --- a/syncer/db.go +++ b/syncer/db.go @@ -98,7 +98,7 @@ type UpStreamConn struct { } func (conn *UpStreamConn) getMasterStatus(flavor string) (mysql.Position, gtid.Set, error) { - return utils.GetMasterStatus(conn.BaseDB.DB, flavor) + pos, gtidSet, err := utils.GetMasterStatus(conn.BaseDB.DB, flavor) failpoint.Inject("GetMasterStatusFailed", func(val failpoint.Value) { err = tmysql.NewErr(uint16(val.(int))) From 8ac2bf1ad83e67fbec7212eb169556999f0bad1b Mon Sep 17 00:00:00 2001 From: luancheng Date: Fri, 18 Oct 2019 13:09:41 +0800 Subject: [PATCH 49/50] address comments --- pkg/conn/baseconn.go | 20 ++++++++++---------- syncer/syncer.go | 3 +-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index 5f6c3320b7..b3baddcc3d 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -39,24 +39,24 @@ import ( // Syncer: // Worker Connection: // DML connection: -// execute some DML on Downstream DB, one unit has `syncer.WorkerCount` worker connections -// DDL Connection: -// execute some DDL on Downstream DB, one unit has one connection +// execute some DML on Downstream DB, one unit has `syncer.WorkerCount` worker connections +// DDL Connection: +// execute some DDL on Downstream DB, one unit has one connection // CheckPoint Connection: -// interact with CheckPoint DB, one unit has one connection +// interact with CheckPoint DB, one unit has one connection // OnlineDDL connection: -// interact with Online DDL DB, one unit has one connection +// interact with Online DDL DB, one unit has one connection // ShardGroupKeeper connection: -// interact with ShardGroupKeeper DB, one unit has one connection +// interact with ShardGroupKeeper DB, one unit has one connection // // Loader: // Worker Connection: -// execute some DML to Downstream DB, one unit has `loader.PoolSize` worker connections +// execute some DML to Downstream DB, one unit has `loader.PoolSize` worker connections // CheckPoint Connection: -// interact with CheckPoint DB, one unit has one connection +// interact with CheckPoint DB, one unit has one connection // Restore Connection: -// only use to create schema and table in restoreData, -// it ignore already exists error and it should be removed after use, one unit has one connection +// only use to create schema and table in restoreData, +// it ignore already exists error and it should be removed after use, one unit has one connection // // each connection should have ability to retry on some common errors (e.g. tmysql.ErrTiKVServerTimeout) or maybe some specify errors in the future // and each connection also should have ability to reset itself during some specify connection error (e.g. driver.ErrBadConn) diff --git a/syncer/syncer.go b/syncer/syncer.go index adb6513e58..1ac8438a31 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -2058,13 +2058,12 @@ func (s *Syncer) createDBs() error { var ddlDBConns []*DBConn s.ddlDB, ddlDBConns, err = createConns(s.tctx, s.cfg, dbCfg, 1) - s.ddlDBConn = ddlDBConns[0] - if err != nil { closeUpstreamConn(s.tctx, s.fromDB) closeBaseDB(s.tctx, s.toDB) return err } + s.ddlDBConn = ddlDBConns[0] return nil } From aaa86654aa68427b22b60accbcc2d613be6fc362 Mon Sep 17 00:00:00 2001 From: luancheng Date: Fri, 18 Oct 2019 13:24:07 +0800 Subject: [PATCH 50/50] address comment --- pkg/conn/baseconn.go | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/pkg/conn/baseconn.go b/pkg/conn/baseconn.go index b3baddcc3d..d9f9ac0e10 100644 --- a/pkg/conn/baseconn.go +++ b/pkg/conn/baseconn.go @@ -34,29 +34,29 @@ import ( // And Similar with sql.Conn, all BaseConn generated from one BaseDB shares this BaseDB to reset // // Basic usage: -// For Syncer and Loader Unit, they both have different amount of connections due to config -// Currently we have some types of connections exist -// Syncer: -// Worker Connection: -// DML connection: -// execute some DML on Downstream DB, one unit has `syncer.WorkerCount` worker connections -// DDL Connection: -// execute some DDL on Downstream DB, one unit has one connection -// CheckPoint Connection: -// interact with CheckPoint DB, one unit has one connection -// OnlineDDL connection: -// interact with Online DDL DB, one unit has one connection -// ShardGroupKeeper connection: -// interact with ShardGroupKeeper DB, one unit has one connection +// For Syncer and Loader Unit, they both have different amount of connections due to config +// Currently we have some types of connections exist +// Syncer: +// Worker Connection: +// DML connection: +// execute some DML on Downstream DB, one unit has `syncer.WorkerCount` worker connections +// DDL Connection: +// execute some DDL on Downstream DB, one unit has one connection +// CheckPoint Connection: +// interact with CheckPoint DB, one unit has one connection +// OnlineDDL connection: +// interact with Online DDL DB, one unit has one connection +// ShardGroupKeeper connection: +// interact with ShardGroupKeeper DB, one unit has one connection // -// Loader: -// Worker Connection: -// execute some DML to Downstream DB, one unit has `loader.PoolSize` worker connections -// CheckPoint Connection: -// interact with CheckPoint DB, one unit has one connection -// Restore Connection: -// only use to create schema and table in restoreData, -// it ignore already exists error and it should be removed after use, one unit has one connection +// Loader: +// Worker Connection: +// execute some DML to Downstream DB, one unit has `loader.PoolSize` worker connections +// CheckPoint Connection: +// interact with CheckPoint DB, one unit has one connection +// Restore Connection: +// only use to create schema and table in restoreData, +// it ignore already exists error and it should be removed after use, one unit has one connection // // each connection should have ability to retry on some common errors (e.g. tmysql.ErrTiKVServerTimeout) or maybe some specify errors in the future // and each connection also should have ability to reset itself during some specify connection error (e.g. driver.ErrBadConn)