diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 41a385a26..7f64aa172 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -35,7 +35,7 @@ Related changes - Need to cherry-pick to the release branch - Need to update the documentation -### Release Note +### Release note - diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bc8fb20fa..472def46a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,10 +4,28 @@ on: branches: - master - 'release-[0-9].[0-9]*' + paths-ignore: + - '**.html' + - '**.md' + - 'CNAME' + - 'LICENSE' + - 'docs/**' + - 'tests/**' + - 'docker/**' + - '.github/workflows/**.yml' pull_request: branches: - master - 'release-[0-9].[0-9]*' + paths-ignore: + - '**.html' + - '**.md' + - 'CNAME' + - 'LICENSE' + - 'docs/**' + - 'tests/**' + - 'docker/**' + - '.github/workflows/**.yml' jobs: compile: @@ -30,7 +48,21 @@ jobs: - name: Set up Go uses: actions/setup-go@v2 with: - go-version: 1.15 + go-version: 1.16 - name: Run build run: make build + + compile-freebsd: + name: Compile for FreeBSD job + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2.1.0 + + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: 1.16 + + - name: Compile for FreeBSD + run: GOOS=freebsd make build diff --git a/.github/workflows/compatible_test.yml b/.github/workflows/compatible_test.yml index fce0a6a11..9fbf0c52e 100644 --- a/.github/workflows/compatible_test.yml +++ b/.github/workflows/compatible_test.yml @@ -5,13 +5,28 @@ on: branches: - master - 'release-[0-9].[0-9]*' + paths-ignore: + - '**.html' + - '**.md' + - 'CNAME' + - 'LICENSE' + - 'docs/**' + - 'tests/**' + - 'docker/**' + - '.github/workflows/**.yml' pull_request: branches: - master - 'release-[0-9].[0-9]*' - issue_comment: - types: - - created + paths-ignore: + - '**.html' + - '**.md' + - 'CNAME' + - 'LICENSE' + - 'docs/**' + - 'tests/**' + - 'docker/**' + - '.github/workflows/**.yml' jobs: build: @@ -19,32 +34,23 @@ jobs: timeout-minutes: 25 steps: - - uses: khan/pull-request-comment-trigger@master - id: check - with: - trigger: '/run-compatiblility-tests' - - name: Free disk space run: | sudo rm -rf /usr/local/lib/android sudo rm -rf /usr/share/dotnet docker volume prune -f docker image prune -f - if: ${{ github.event_name == 'pull_request' || steps.check.outputs.triggered == 'true' }} - uses: actions/checkout@v2 - if: ${{ github.event_name == 'pull_request' || steps.check.outputs.triggered == 'true' }} - name: Set up Go uses: actions/setup-go@v2 with: - go-version: 1.15 - if: ${{ github.event_name == 'pull_request' || steps.check.outputs.triggered == 'true' }} + go-version: 1.16 - name: Generate compatibility test backup data timeout-minutes: 15 run: sh compatibility/prepare_backup.sh - if: ${{ github.event_name == 'pull_request' || steps.check.outputs.triggered == 'true' }} - name: Start server run: | @@ -52,7 +58,6 @@ jobs: TAG=nightly PORT_SUFFIX=1 docker-compose -f compatibility/backup_cluster.yaml build TAG=nightly PORT_SUFFIX=1 docker-compose -f compatibility/backup_cluster.yaml up --remove-orphans -d TAG=nightly PORT_SUFFIX=1 docker-compose -f compatibility/backup_cluster.yaml exec -T control make compatibility_test - if: ${{ github.event_name == 'pull_request' || steps.check.outputs.triggered == 'true' }} - name: Collect component log if: ${{ failure() }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f6ab9373d..1f2846471 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,7 @@ your contribution accepted. Developing BR requires: -* [Go 1.13+](http://golang.org/doc/code.html) +* [Go 1.16+](http://golang.org/doc/code.html) * An internet connection to download the dependencies Simply run `make` to build the program. diff --git a/Makefile b/Makefile index 468adeb6d..8faf1a548 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,13 @@ ifeq ("$(WITH_RACE)", "1") GOBUILD = CGO_ENABLED=1 GO111MODULE=on $(GO) build -ldflags '$(LDFLAGS)' endif +# There is no FreeBSD environment for GitHub actions. So cross-compile on Linux +# but that doesn't work with CGO_ENABLED=1, so disable cgo. The reason to have +# cgo enabled on regular builds is performance. +ifeq ("$(GOOS)", "freebsd") + GOBUILD = CGO_ENABLED=0 GO111MODULE=on go build -trimpath -ldflags '$(LDFLAGS)' +endif + all: build check test prepare: diff --git a/README.md b/README.md index 2e60801ab..8ae005489 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ $ make $ make test ``` -Notice BR supports building with Go version `Go >= 1.13` +Notice BR supports building with Go version `Go >= 1.16` When BR is built successfully, you can find binary in the `bin` directory. @@ -108,15 +108,15 @@ bin/br backup table --db test \ -s local:///tmp/backup_test/ \ --pd ${PD_ADDR}:2379 \ --log-file backup_test.log \ - + # Let's drop the table. mysql -uroot --host 127.0.0.1 -P4000 -E -e "USE test; DROP TABLE order_line; show tables" -u root -p # Restore from the backup. bin/br restore table --db test \ - --table order_line \ - -s local:///tmp/backup_test/ \ - --pd ${PD_ADDR}:2379 \ + --table order_line \ + -s local:///tmp/backup_test/ \ + --pd ${PD_ADDR}:2379 \ --log-file restore_test.log # How many rows do we get after restore? Expected to be 300242 rows. diff --git a/cmd/br/cmd.go b/cmd/br/cmd.go index 54a89ff10..96014dae7 100644 --- a/cmd/br/cmd.go +++ b/cmd/br/cmd.go @@ -15,7 +15,6 @@ import ( "github.com/pingcap/log" tidbutils "github.com/pingcap/tidb-tools/pkg/utils" "github.com/pingcap/tidb/util/logutil" - "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/pingcap/br/pkg/gluetidb" @@ -101,6 +100,27 @@ func AddFlags(cmd *cobra.Command) { // Init initializes BR cli. func Init(cmd *cobra.Command) (err error) { initOnce.Do(func() { + slowLogFilename, e := cmd.Flags().GetString(FlagSlowLogFile) + if e != nil { + err = e + return + } + tidbLogCfg := logutil.LogConfig{} + if len(slowLogFilename) != 0 { + tidbLogCfg.SlowQueryFile = slowLogFilename + // Just for special grpc log file, + // otherwise the info will be print in stdout... + tidbLogCfg.File.Filename = timestampLogFileName() + } else { + // Disable annoying TiDB Log. + // TODO: some error logs outputs randomly, we need to fix them in TiDB. + tidbLogCfg.Level = "fatal" + } + e = logutil.InitLogger(&tidbLogCfg) + if e != nil { + err = e + return + } // Initialize the logger. conf := new(log.Config) conf.Level, err = cmd.Flags().GetString(FlagLogLevel) @@ -144,28 +164,6 @@ func Init(cmd *cobra.Command) (err error) { return } redact.InitRedact(redactLog || redactInfoLog) - - slowLogFilename, e := cmd.Flags().GetString(FlagSlowLogFile) - if e != nil { - err = e - return - } - tidbLogCfg := logutil.LogConfig{} - if len(slowLogFilename) != 0 { - tidbLogCfg.SlowQueryFile = slowLogFilename - } else { - // Hack! Discard slow log by setting log level to PanicLevel - logutil.SlowQueryLogger.SetLevel(logrus.PanicLevel) - // Disable annoying TiDB Log. - // TODO: some error logs outputs randomly, we need to fix them in TiDB. - tidbLogCfg.Level = "fatal" - } - e = logutil.InitLogger(&tidbLogCfg) - if e != nil { - err = e - return - } - err = startPProf(cmd) }) return errors.Trace(err) diff --git a/cmd/br/restore.go b/cmd/br/restore.go index 498be61f1..43a9eb563 100644 --- a/cmd/br/restore.go +++ b/cmd/br/restore.go @@ -127,7 +127,7 @@ func newFullRestoreCommand() *cobra.Command { func newDBRestoreCommand() *cobra.Command { command := &cobra.Command{ Use: "db", - Short: "restore tables in a database", + Short: "restore tables in a database from the backup data", Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, _ []string) error { return runRestoreCommand(cmd, "Database restore") @@ -140,7 +140,7 @@ func newDBRestoreCommand() *cobra.Command { func newTableRestoreCommand() *cobra.Command { command := &cobra.Command{ Use: "table", - Short: "restore a table", + Short: "restore a table from the backup data", Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, _ []string) error { return runRestoreCommand(cmd, "Table restore") diff --git a/compatibility/get_last_tags.sh b/compatibility/get_last_tags.sh new file mode 100644 index 000000000..9da09f9aa --- /dev/null +++ b/compatibility/get_last_tags.sh @@ -0,0 +1,20 @@ +# update tags +git fetch --tags + +getLatestTags() { + release_5_branch_regex="^release-5\.[0-9].*$" + release_4_branch_regex="^release-4\.[0-9].*$" + TOTAL_TAGS=$(git for-each-ref --sort=creatordate refs/tags | awk -F '/' '{print $3}') + filter='alpha' + # latest tags + TAGS=$(echo $TOTAL_TAGS | tr ' ' '\n' | grep -v $filter | tail -n3) + if git rev-parse --abbrev-ref HEAD | egrep -q $release_5_branch_regex + then + # If we are in release-5.0 branch, try to use latest 3 version of 5.x and last 4.x version + TAGS=$(echo $TOTAL_TAGS | tr ' ' '\n' | fgrep "v4." | grep -v $filter | tail -n1 && echo $TOTAL_TAGS | tr ' ' '\n' | fgrep "v5." | grep -v $filter | tail -n3) + elif git rev-parse --abbrev-ref HEAD | egrep -q $release_4_branch_regex + then + # If we are in release-4.0 branch, try to use latest 3 version of 4.x + TAGS=$(echo $TOTAL_TAGS | tr ' ' '\n' | fgrep "v4." | grep -v $filter | tail -n3) + fi +} \ No newline at end of file diff --git a/compatibility/prepare_backup.sh b/compatibility/prepare_backup.sh index 73c8df466..56eab1949 100644 --- a/compatibility/prepare_backup.sh +++ b/compatibility/prepare_backup.sh @@ -15,27 +15,9 @@ set -eux -# update tags -git fetch --tags +. compatibility/get_last_tags.sh TAGS="v5.0.0" -getLatestTags() { - release_5_branch_regex="^release-5\.[0-9].*$" - release_4_branch_regex="^release-4\.[0-9].*$" - TOTAL_TAGS=$(git for-each-ref --sort=creatordate refs/tags | awk -F '/' '{print $3}') - # latest tags - TAGS=$(echo $TOTAL_TAGS | tr ' ' '\n' | tail -n3) - if git rev-parse --abbrev-ref HEAD | egrep -q $release_5_branch_regex - then - # If we are in release-5.0 branch, try to use latest 3 version of 5.x and last 4.x version - TAGS=$(echo $TOTAL_TAGS | tr ' ' '\n' | fgrep "v4." | tail -n1 && echo $TOTAL_TAGS | tr ' ' '\n' | fgrep "v5." | tail -n3) - elif git rev-parse --abbrev-ref HEAD | egrep -q $release_4_branch_regex - then - # If we are in release-4.0 branch, try to use latest 3 version of 4.x - TAGS=$(echo $TOTAL_TAGS | tr ' ' '\n' | fgrep "v4." | tail -n3) - fi -} - getLatestTags echo "recent version of cluster is $TAGS" diff --git a/docker/Dockerfile b/docker/Dockerfile index 14c577fcf..36b236fc9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,5 @@ # For loading data to TiDB -FROM golang:1.13.8-buster as go-ycsb-builder +FROM golang:1.16.4-buster as go-ycsb-builder WORKDIR /go/src/github.com/pingcap/ RUN git clone https://github.com/pingcap/go-ycsb.git && \ cd go-ycsb && \ @@ -8,7 +8,7 @@ RUN git clone https://github.com/pingcap/go-ycsb.git && \ # For operating minio S3 compatible storage FROM minio/mc as mc-builder -FROM golang:1.13.8-buster +FROM golang:1.16.4-buster RUN apt-get update && apt-get install -y --no-install-recommends \ git \ diff --git a/errors.toml b/errors.toml index b90fa2da6..f6980988b 100644 --- a/errors.toml +++ b/errors.toml @@ -31,6 +31,11 @@ error = ''' invalid argument ''' +["BR:Common:ErrUndefinedDbOrTable"] +error = ''' +undefined restore databases or tables +''' + ["BR:Common:ErrUnknown"] error = ''' internal error @@ -76,11 +81,6 @@ error = ''' key not in region ''' -["BR:KV:ErrKVNotHealth"] -error = ''' -tikv cluster not health -''' - ["BR:KV:ErrKVNotLeader"] error = ''' not leader @@ -96,9 +96,14 @@ error = ''' rewrite rule not found ''' +["BR:KV:ErrKVStorage"] +error = ''' +tikv storage occur I/O error +''' + ["BR:KV:ErrKVUnknown"] error = ''' -unknown tikv error +unknown error occur on tikv ''' ["BR:KV:ErrNotTiKVStorage"] diff --git a/go.mod1 b/go.mod1 index 6fb40587e..b116f2962 100644 --- a/go.mod1 +++ b/go.mod1 @@ -1,6 +1,6 @@ module github.com/pingcap/br -go 1.13 +go 1.16 require ( cloud.google.com/go/storage v1.6.0 @@ -24,16 +24,15 @@ require ( github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712 github.com/pingcap/errors v0.11.5-0.20201126102027-b0a155152ca3 github.com/pingcap/failpoint v0.0.0-20210316064728-7acb0f0a3dfd - github.com/pingcap/kvproto v0.0.0-20210308063835-39b884695fb8 + github.com/pingcap/kvproto v0.0.0-20210507074444-0ec2d0dc2e4b github.com/pingcap/log v0.0.0-20210317133921-96f4fcab92a4 - github.com/pingcap/parser v0.0.0-20210330190622-f959a136fc19 - github.com/pingcap/tidb v1.1.0-beta.0.20210419040752-76ba3c84acbc + github.com/pingcap/parser v0.0.0-20210525032559-c37778aff307 + github.com/pingcap/tidb v1.1.0-beta.0.20210602080826-ca3d88eba5c6 github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible - github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b + github.com/pingcap/tipb v0.0.0-20210525032549-b80be13ddf6c github.com/prometheus/client_golang v1.5.1 github.com/prometheus/client_model v0.2.0 github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 - github.com/sirupsen/logrus v1.6.0 github.com/spf13/cobra v1.0.0 github.com/spf13/pflag v1.0.5 github.com/tikv/pd v1.1.0-beta.0.20210323121136-78679e5e209d @@ -41,13 +40,13 @@ require ( github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 go.etcd.io/etcd v0.5.0-alpha.5.0.20200824191128-ae9734ed278b go.uber.org/atomic v1.7.0 - go.uber.org/multierr v1.6.0 + go.uber.org/multierr v1.7.0 go.uber.org/zap v1.16.0 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 golang.org/x/sys v0.0.0-20210324051608-47abb6519492 - golang.org/x/text v0.3.5 + golang.org/x/text v0.3.6 google.golang.org/api v0.22.0 google.golang.org/grpc v1.27.1 modernc.org/mathutil v1.2.2 diff --git a/go.sum1 b/go.sum1 index d9a44b6ce..71c7b4a43 100644 --- a/go.sum1 +++ b/go.sum1 @@ -97,8 +97,6 @@ github.com/cockroachdb/pebble v0.0.0-20201023120638-f1224da22976/go.mod h1:BbtTi github.com/cockroachdb/redact v0.0.0-20200622112456-cd282804bbd3 h1:2+dpIJzYMSbLi0587YXpi8tOJT52qCOI/1I0UNThc/I= github.com/cockroachdb/redact v0.0.0-20200622112456-cd282804bbd3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= -github.com/codahale/hdrhistogram v0.9.0 h1:9GjrtRI+mLEFPtTfR/AZhcxp+Ii8NZYWq5104FbZQY0= -github.com/codahale/hdrhistogram v0.9.0/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= github.com/coocood/bbloom v0.0.0-20190830030839-58deb6228d64 h1:W1SHiII3e0jVwvaQFglwu3kS9NLxOeTpvik7MbKCyuQ= github.com/coocood/bbloom v0.0.0-20190830030839-58deb6228d64/go.mod h1:F86k/6c7aDUdwSUevnLpHS/3Q9hzYCE99jGk2xsHnt0= @@ -162,7 +160,6 @@ github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= -github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsouza/fake-gcs-server v1.17.0/go.mod h1:D1rTE4YCyHFNa99oyJJ5HyclvN/0uQR+pM/VdlL83bw= github.com/fsouza/fake-gcs-server v1.19.0 h1:XyaGOlqo+R5sjT03x2ymk0xepaQlgwhRLTT2IopW0zA= @@ -425,7 +422,7 @@ github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5e github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d/go.mod h1:lXfE4PvvTW5xOjO6Mba8zDPyw8M93B6AQ7frTGnMlA8= github.com/pingcap/badger v1.5.1-0.20200908111422-2e78ee155d19 h1:IXpGy7y9HyoShAFmzW2OPF0xCA5EOoSTyZHwsgYk9Ro= github.com/pingcap/badger v1.5.1-0.20200908111422-2e78ee155d19/go.mod h1:LyrqUOHZrUDf9oGi1yoz1+qw9ckSIhQb5eMa1acOLNQ= -github.com/pingcap/br v5.0.0-nightly.0.20210407061032-be5523713acf+incompatible/go.mod h1:ymVmo50lQydxib0tmK5hHk4oteB7hZ0IMCArunwy3UQ= +github.com/pingcap/br v5.1.0-alpha.0.20210526054934-d5f5f9df24f5+incompatible/go.mod h1:ymVmo50lQydxib0tmK5hHk4oteB7hZ0IMCArunwy3UQ= github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ= github.com/pingcap/check v0.0.0-20191107115940-caf2b9e6ccf4/go.mod h1:PYMCGwN0JHjoqGr3HrZoD+b8Tgx8bKnArhSq8YVzUMc= github.com/pingcap/check v0.0.0-20191216031241-8a5a85928f12/go.mod h1:PYMCGwN0JHjoqGr3HrZoD+b8Tgx8bKnArhSq8YVzUMc= @@ -450,25 +447,26 @@ github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17Xtb github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= github.com/pingcap/kvproto v0.0.0-20210219064844-c1844a4775d6/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20210308063835-39b884695fb8 h1:t72qxPxunoKykkAuO5glpWGdoP+RmvKvX0lvmyFV0fI= -github.com/pingcap/kvproto v0.0.0-20210308063835-39b884695fb8/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/kvproto v0.0.0-20210507054410-a8152f8a876c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/kvproto v0.0.0-20210507074444-0ec2d0dc2e4b h1:e42N26QQjVA/obDrFFapJ1YLB+j5aPQOh7R+cIGR9Bk= +github.com/pingcap/kvproto v0.0.0-20210507074444-0ec2d0dc2e4b/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20201112100606-8f1e84a3abc8/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210317133921-96f4fcab92a4 h1:ERrF0fTuIOnwfGbt71Ji3DKbOEaP189tjym50u8gpC8= github.com/pingcap/log v0.0.0-20210317133921-96f4fcab92a4/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= -github.com/pingcap/parser v0.0.0-20210330190622-f959a136fc19 h1:WQJbP0G8RL9v+rQV7/2UAImJcV6FU9gpJ1fzpyQuaXg= -github.com/pingcap/parser v0.0.0-20210330190622-f959a136fc19/go.mod h1:xZC8I7bug4GJ5KtHhgAikjTfU4kBv1Sbo3Pf1MZ6lVw= +github.com/pingcap/parser v0.0.0-20210525032559-c37778aff307 h1:v7SipssMu4X1tVQOe3PIVE73keJNHCFXe4Cza5uNDZ8= +github.com/pingcap/parser v0.0.0-20210525032559-c37778aff307/go.mod h1:xZC8I7bug4GJ5KtHhgAikjTfU4kBv1Sbo3Pf1MZ6lVw= github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= github.com/pingcap/sysutil v0.0.0-20210315073920-cc0985d983a3 h1:A9KL9R+lWSVPH8IqUuH1QSTRJ5FGoY1bT2IcfPKsWD8= github.com/pingcap/sysutil v0.0.0-20210315073920-cc0985d983a3/go.mod h1:tckvA041UWP+NqYzrJ3fMgC/Hw9wnmQ/tUkp/JaHly8= -github.com/pingcap/tidb v1.1.0-beta.0.20210419040752-76ba3c84acbc h1:NKuYxjsKNrxiejvPIrXVYOwjCjQ+15PW9ZZzMp6stmM= -github.com/pingcap/tidb v1.1.0-beta.0.20210419040752-76ba3c84acbc/go.mod h1:OrxS2nA5mNkyPmJT9F+xPRPBYeddjwJZHYl99Zzr3B8= +github.com/pingcap/tidb v1.1.0-beta.0.20210602080826-ca3d88eba5c6 h1:c26DauMES20YYq1P1cu1tURWgWFuvMHbGEenmnpyJY8= +github.com/pingcap/tidb v1.1.0-beta.0.20210602080826-ca3d88eba5c6/go.mod h1:wDXJsUfKc+xXIuFBFY2vopJpfi1zl5EsNwbgU5k+iAQ= github.com/pingcap/tidb-dashboard v0.0.0-20210312062513-eef5d6404638/go.mod h1:OzFN8H0EDMMqeulPhPMw2i2JaiZWOKFQ7zdRPhENNgo= github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible h1:ceznmu/lLseGHP/jKyOa/3u/5H3wtLLLqkH2V3ssSjg= github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM= -github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b h1:sZHSH0mh8PcRbmZlsIqP7CEwnfFuBpmkGt5i9JStLWA= -github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b/go.mod h1:nsEhnMokcn7MRqd2J60yxpn/ac3ZH8A6GOJ9NslabUo= +github.com/pingcap/tipb v0.0.0-20210525032549-b80be13ddf6c h1:El3pMBpJHuSkItkHsnBqsaaHzJwFBNDt3Aul98AhREY= +github.com/pingcap/tipb v0.0.0-20210525032549-b80be13ddf6c/go.mod h1:nsEhnMokcn7MRqd2J60yxpn/ac3ZH8A6GOJ9NslabUo= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -558,8 +556,9 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E= github.com/swaggo/gin-swagger v1.2.0/go.mod h1:qlH2+W7zXGZkczuL+r2nEBR2JTT+/lX05Nn6vPhc7OI= github.com/swaggo/http-swagger v0.0.0-20200308142732-58ac5e232fba/go.mod h1:O1lAbCgAAX/KZ80LM/OXwtWFI/5TvZlwxSg8Cq08PV0= @@ -644,8 +643,8 @@ go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/ go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= -go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= -go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/multierr v1.7.0 h1:zaiO/rmgFjbmCXdSYJWQcdvOCsthmdaHfr3Gm2Kx4Ec= +go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.8.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= @@ -793,8 +792,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5 h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -933,16 +932,16 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ= -gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.1.3 h1:qTakTkI6ni6LFD5sBwwsdSO+AQqbSIxOauHTTQKZ/7o= -honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= +honnef.co/go/tools v0.2.0 h1:ws8AfbgTX3oIczLPNPCu5166oBg9ST2vNs0rcht+mDE= +honnef.co/go/tools v0.2.0/go.mod h1:lPVVZ2BS5TfnjLyizF7o7hv7j9/L+8cZY2hLyjP9cGY= k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= modernc.org/mathutil v1.2.2 h1:+yFk8hBprV+4c0U9GjFtL+dV3N8hOJ8JCituQcMShFY= modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= diff --git a/pkg/backup/client.go b/pkg/backup/client.go index 8de9133bd..5fd144311 100644 --- a/pkg/backup/client.go +++ b/pkg/backup/client.go @@ -266,8 +266,18 @@ func BuildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) { } func appendRanges(tbl *model.TableInfo, tblID int64) ([]kv.KeyRange, error) { - ranges := ranger.FullIntRange(false) - kvRanges := distsql.TableRangesToKVRanges(tblID, ranges, nil) + var ranges []*ranger.Range + if tbl.IsCommonHandle { + ranges = ranger.FullNotNullRange() + } else { + ranges = ranger.FullIntRange(false) + } + + kvRanges, err := distsql.TableHandleRangesToKVRanges(nil, []int64{tblID}, tbl.IsCommonHandle, ranges, nil) + if err != nil { + return nil, errors.Trace(err) + } + for _, index := range tbl.Indices { if index.State != model.StatePublic { continue @@ -721,9 +731,8 @@ func (bc *Client) fineGrainedBackup( max.mu.Unlock() if ms != 0 { log.Info("handle fine grained", zap.Int("backoffMs", ms)) - // 2 means tikv.boTxnLockFast // TODO: fill a meaningful error. - err := bo.BackoffWithMaxSleep(2, ms, berrors.ErrUnknown) + err := bo.BackoffWithMaxSleepTxnLockFast(ms, berrors.ErrUnknown) if err != nil { return errors.Trace(err) } @@ -1028,7 +1037,7 @@ func ChecksumMatches(backupMeta *backuppb.BackupMeta, local []Checksum) error { zap.Uint64("origin tidb total bytes", schema.TotalBytes), zap.Uint64("calculated total bytes", localChecksum.TotalBytes)) // TODO enhance error - return errors.Annotate(berrors.ErrBackupChecksumMismatch, "failed in checksum, and cannot parse table info") + return berrors.ErrBackupChecksumMismatch } log.Info("checksum success", zap.String("database", dbInfo.Name.L), diff --git a/pkg/backup/client_test.go b/pkg/backup/client_test.go index aa29dc555..b07779aee 100644 --- a/pkg/backup/client_test.go +++ b/pkg/backup/client_test.go @@ -17,6 +17,7 @@ import ( "github.com/pingcap/tidb/store/tikv/mockstore/mocktikv" "github.com/pingcap/tidb/store/tikv/oracle" "github.com/pingcap/tidb/tablecodec" + "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/codec" pd "github.com/tikv/pd/client" @@ -104,7 +105,7 @@ func (r *testBackup) TestGetTS(c *C) { c.Assert(ts, Equals, backupts) } -func (r *testBackup) TestBuildTableRange(c *C) { +func (r *testBackup) TestBuildTableRangeIntHandle(c *C) { type Case struct { ids []int64 trs []kv.KeyRange @@ -145,6 +146,50 @@ func (r *testBackup) TestBuildTableRange(c *C) { }) } +func (r *testBackup) TestBuildTableRangeCommonHandle(c *C) { + type Case struct { + ids []int64 + trs []kv.KeyRange + } + low, err_l := codec.EncodeKey(nil, nil, []types.Datum{types.MinNotNullDatum()}...) + c.Assert(err_l, IsNil) + high, err_h := codec.EncodeKey(nil, nil, []types.Datum{types.MaxValueDatum()}...) + c.Assert(err_h, IsNil) + high = kv.Key(high).PrefixNext() + cases := []Case{ + {ids: []int64{1}, trs: []kv.KeyRange{ + {StartKey: tablecodec.EncodeRowKey(1, low), EndKey: tablecodec.EncodeRowKey(1, high)}, + }}, + {ids: []int64{1, 2, 3}, trs: []kv.KeyRange{ + {StartKey: tablecodec.EncodeRowKey(1, low), EndKey: tablecodec.EncodeRowKey(1, high)}, + {StartKey: tablecodec.EncodeRowKey(2, low), EndKey: tablecodec.EncodeRowKey(2, high)}, + {StartKey: tablecodec.EncodeRowKey(3, low), EndKey: tablecodec.EncodeRowKey(3, high)}, + }}, + {ids: []int64{1, 3}, trs: []kv.KeyRange{ + {StartKey: tablecodec.EncodeRowKey(1, low), EndKey: tablecodec.EncodeRowKey(1, high)}, + {StartKey: tablecodec.EncodeRowKey(3, low), EndKey: tablecodec.EncodeRowKey(3, high)}, + }}, + } + for _, cs := range cases { + c.Log(cs) + tbl := &model.TableInfo{Partition: &model.PartitionInfo{Enable: true}, IsCommonHandle: true} + for _, id := range cs.ids { + tbl.Partition.Definitions = append(tbl.Partition.Definitions, + model.PartitionDefinition{ID: id}) + } + ranges, err := backup.BuildTableRanges(tbl) + c.Assert(err, IsNil) + c.Assert(ranges, DeepEquals, cs.trs) + } + + tbl := &model.TableInfo{ID: 7, IsCommonHandle: true} + ranges, err_r := backup.BuildTableRanges(tbl) + c.Assert(err_r, IsNil) + c.Assert(ranges, DeepEquals, []kv.KeyRange{ + {StartKey: tablecodec.EncodeRowKey(7, low), EndKey: tablecodec.EncodeRowKey(7, high)}, + }) +} + func (r *testBackup) TestOnBackupRegionErrorResponse(c *C) { type Case struct { storeID uint64 diff --git a/pkg/backup/push.go b/pkg/backup/push.go index e97a3fb4e..8d933d629 100644 --- a/pkg/backup/push.go +++ b/pkg/backup/push.go @@ -4,6 +4,7 @@ package backup import ( "context" + "fmt" "sync" "github.com/opentracing/opentracing-go" @@ -24,15 +25,28 @@ import ( // pushDown wraps a backup task. type pushDown struct { mgr ClientMgr - respCh chan *backuppb.BackupResponse + respCh chan responseAndStore errCh chan error } +type responseAndStore struct { + Resp *backuppb.BackupResponse + Store *metapb.Store +} + +func (r responseAndStore) GetResponse() *backuppb.BackupResponse { + return r.Resp +} + +func (r responseAndStore) GetStore() *metapb.Store { + return r.Store +} + // newPushDown creates a push down backup. func newPushDown(mgr ClientMgr, cap int) *pushDown { return &pushDown{ mgr: mgr, - respCh: make(chan *backuppb.BackupResponse, cap), + respCh: make(chan responseAndStore, cap), errCh: make(chan error, cap), } } @@ -59,6 +73,7 @@ func (push *pushDown) pushBackup( wg := new(sync.WaitGroup) for _, s := range stores { + store := s storeID := s.GetId() if s.GetState() != metapb.StoreState_Up { log.Warn("skip store", zap.Uint64("StoreID", storeID), zap.Stringer("State", s.GetState())) @@ -78,7 +93,10 @@ func (push *pushDown) pushBackup( ctx, storeID, client, req, func(resp *backuppb.BackupResponse) error { // Forward all responses (including error). - push.respCh <- resp + push.respCh <- responseAndStore{ + Resp: resp, + Store: store, + } return nil }, func() (backuppb.BackupClient, error) { @@ -101,7 +119,9 @@ func (push *pushDown) pushBackup( for { select { - case resp, ok := <-push.respCh: + case respAndStore, ok := <-push.respCh: + resp := respAndStore.GetResponse() + store := respAndStore.GetStore() if !ok { // Finished. return res, nil @@ -113,6 +133,13 @@ func (push *pushDown) pushBackup( Msg: msg, } }) + failpoint.Inject("tikv-rw-error", func(val failpoint.Value) { + msg := val.(string) + log.Debug("failpoint tikv-rw-error injected.", zap.String("msg", msg)) + resp.Error = &backuppb.Error{ + Msg: msg, + } + }) if resp.GetError() == nil { // None error means range has been backuped successfully. res.Put( @@ -137,8 +164,18 @@ func (push *pushDown) pushBackup( log.Warn("backup occur storage error", zap.String("error", errPb.GetMsg())) continue } - log.Error("backup occur unknown error", zap.String("error", errPb.GetMsg())) - return res, errors.Annotatef(berrors.ErrKVUnknown, "%v", errPb) + if utils.MessageIsNotFoundStorageError(errPb.GetMsg()) { + errMsg := fmt.Sprintf("File or directory not found error occurs on TiKV Node(store id: %v; Address: %s)", store.GetId(), redact.String(store.GetAddress())) + log.Error("", zap.String("error", berrors.ErrKVStorage.Error()+": "+errMsg), + zap.String("work around", "please ensure br and tikv node share a same disk and the user of br and tikv has same uid.")) + } + + if utils.MessageIsPermissionDeniedStorageError(errPb.GetMsg()) { + errMsg := fmt.Sprintf("I/O permission denied error occurs on TiKV Node(store id: %v; Address: %s)", store.GetId(), redact.String(store.GetAddress())) + log.Error("", zap.String("error", berrors.ErrKVStorage.Error()+": "+errMsg), + zap.String("work around", "please ensure tikv has permission to read from & write to the storage.")) + } + return res, berrors.ErrKVStorage } } case err := <-push.errCh: diff --git a/pkg/checksum/executor.go b/pkg/checksum/executor.go index 9cee17079..a4e9b17a4 100644 --- a/pkg/checksum/executor.go +++ b/pkg/checksum/executor.go @@ -12,7 +12,6 @@ import ( "github.com/pingcap/tidb/distsql" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/sessionctx/variable" - tikvstore "github.com/pingcap/tidb/store/tikv/kv" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/ranger" "github.com/pingcap/tipb/go-tipb" @@ -112,7 +111,7 @@ func buildRequest( concurrency uint, ) ([]*kv.Request, error) { reqs := make([]*kv.Request, 0) - req, err := buildTableRequest(tableID, oldTable, oldTableID, startTS, concurrency) + req, err := buildTableRequest(tableInfo, tableID, oldTable, oldTableID, startTS, concurrency) if err != nil { return nil, errors.Trace(err) } @@ -152,6 +151,7 @@ func buildRequest( } func buildTableRequest( + tableInfo *model.TableInfo, tableID int64, oldTable *utils.Table, oldTableID int64, @@ -172,12 +172,17 @@ func buildTableRequest( Rule: rule, } - ranges := ranger.FullIntRange(false) + var ranges []*ranger.Range + if tableInfo.IsCommonHandle { + ranges = ranger.FullNotNullRange() + } else { + ranges = ranger.FullIntRange(false) + } var builder distsql.RequestBuilder // Use low priority to reducing impact to other requests. - builder.Request.Priority = tikvstore.PriorityLow - return builder.SetTableRanges(tableID, ranges, nil). + builder.Request.Priority = kv.PriorityLow + return builder.SetHandleRanges(nil, tableID, tableInfo.IsCommonHandle, ranges, nil). SetStartTS(startTS). SetChecksumRequest(checksum). SetConcurrency(int(concurrency)). @@ -209,7 +214,7 @@ func buildIndexRequest( var builder distsql.RequestBuilder // Use low priority to reducing impact to other requests. - builder.Request.Priority = tikvstore.PriorityLow + builder.Request.Priority = kv.PriorityLow return builder.SetIndexRanges(nil, tableID, indexInfo.ID, ranges). SetStartTS(startTS). SetChecksumRequest(checksum). diff --git a/pkg/checksum/executor_test.go b/pkg/checksum/executor_test.go index 444ffc5bd..262c26497 100644 --- a/pkg/checksum/executor_test.go +++ b/pkg/checksum/executor_test.go @@ -14,6 +14,7 @@ import ( "github.com/pingcap/tidb/util/testkit" "github.com/pingcap/tidb/util/testleak" + "github.com/pingcap/br/pkg/backup" "github.com/pingcap/br/pkg/checksum" "github.com/pingcap/br/pkg/mock" "github.com/pingcap/br/pkg/utils" @@ -108,4 +109,23 @@ func (s *testChecksumSuite) TestChecksum(c *C) { resp2, err = exe2.Execute(context.TODO(), s.mock.Storage.GetClient(), func() {}) c.Assert(err, IsNil) c.Assert(resp2, NotNil) + + // Test commonHandle ranges + + tk.MustExec("drop table if exists t3;") + tk.MustExec("create table t3 (a char(255), b int, primary key(a) CLUSTERED);") + tk.MustExec("insert into t3 values ('fffffffff', 1), ('010101010', 2), ('394393fj39efefe', 3);") + tableInfo3 := s.getTableInfo(c, "test", "t3") + exe3, err := checksum.NewExecutorBuilder(tableInfo3, math.MaxUint64).Build() + c.Assert(err, IsNil) + first := true + exe3.Each(func(req *kv.Request) error { + if first { + first = false + ranges, err := backup.BuildTableRanges(tableInfo3) + c.Assert(err, IsNil) + c.Assert(req.KeyRanges, DeepEquals, ranges[:1], Commentf("%v", req.KeyRanges)) + } + return nil + }) } diff --git a/pkg/conn/conn.go b/pkg/conn/conn.go index 0f5a075c3..e69ab263f 100644 --- a/pkg/conn/conn.go +++ b/pkg/conn/conn.go @@ -7,7 +7,6 @@ import ( "crypto/tls" "os" "sync" - "sync/atomic" "time" "github.com/opentracing/opentracing-go" @@ -218,12 +217,6 @@ func NewMgr( } liveStoreCount++ } - if liveStoreCount == 0 && - // Assume 3 replicas - len(stores) >= 3 && len(stores) > liveStoreCount+1 { - log.Error("tikv cluster not health", zap.Reflect("stores", stores)) - return nil, errors.Annotatef(berrors.ErrKVNotHealth, "%+v", stores) - } var dom *domain.Domain if needDomain { @@ -390,8 +383,7 @@ func (mgr *Mgr) Close() { if mgr.dom != nil { mgr.dom.Close() } - - atomic.StoreUint32(&tikv.ShuttingDown, 1) + tikv.StoreShuttingDown(1) mgr.storage.Close() } diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index 1a096861b..148765115 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -17,10 +17,11 @@ func Is(err error, is *errors.Error) bool { // BR errors. var ( - ErrUnknown = errors.Normalize("internal error", errors.RFCCodeText("BR:Common:ErrUnknown")) - ErrInvalidArgument = errors.Normalize("invalid argument", errors.RFCCodeText("BR:Common:ErrInvalidArgument")) - ErrVersionMismatch = errors.Normalize("version mismatch", errors.RFCCodeText("BR:Common:ErrVersionMismatch")) - ErrFailedToConnect = errors.Normalize("failed to make gRPC channels", errors.RFCCodeText("BR:Common:ErrFailedToConnect")) + ErrUnknown = errors.Normalize("internal error", errors.RFCCodeText("BR:Common:ErrUnknown")) + ErrInvalidArgument = errors.Normalize("invalid argument", errors.RFCCodeText("BR:Common:ErrInvalidArgument")) + ErrUndefinedRestoreDbOrTable = errors.Normalize("undefined restore databases or tables", errors.RFCCodeText("BR:Common:ErrUndefinedDbOrTable")) + ErrVersionMismatch = errors.Normalize("version mismatch", errors.RFCCodeText("BR:Common:ErrVersionMismatch")) + ErrFailedToConnect = errors.Normalize("failed to make gRPC channels", errors.RFCCodeText("BR:Common:ErrFailedToConnect")) ErrPDUpdateFailed = errors.Normalize("failed to update PD", errors.RFCCodeText("BR:PD:ErrPDUpdateFailed")) ErrPDLeaderNotFound = errors.Normalize("PD leader not found", errors.RFCCodeText("BR:PD:ErrPDLeaderNotFound")) @@ -54,9 +55,9 @@ var ( ErrStorageInvalidConfig = errors.Normalize("invalid external storage config", errors.RFCCodeText("BR:ExternalStorage:ErrStorageInvalidConfig")) // Errors reported from TiKV. - ErrKVUnknown = errors.Normalize("unknown tikv error", errors.RFCCodeText("BR:KV:ErrKVUnknown")) + ErrKVStorage = errors.Normalize("tikv storage occur I/O error", errors.RFCCodeText("BR:KV:ErrKVStorage")) + ErrKVUnknown = errors.Normalize("unknown error occur on tikv", errors.RFCCodeText("BR:KV:ErrKVUnknown")) ErrKVClusterIDMismatch = errors.Normalize("tikv cluster ID mismatch", errors.RFCCodeText("BR:KV:ErrKVClusterIDMismatch")) - ErrKVNotHealth = errors.Normalize("tikv cluster not health", errors.RFCCodeText("BR:KV:ErrKVNotHealth")) ErrKVNotLeader = errors.Normalize("not leader", errors.RFCCodeText("BR:KV:ErrKVNotLeader")) ErrKVNotTiKV = errors.Normalize("storage is not tikv", errors.RFCCodeText("BR:KV:ErrNotTiKVStorage")) diff --git a/pkg/gluetikv/glue.go b/pkg/gluetikv/glue.go index 24c8d0f9a..1e6f5108d 100644 --- a/pkg/gluetikv/glue.go +++ b/pkg/gluetikv/glue.go @@ -54,7 +54,7 @@ func (Glue) StartProgress(ctx context.Context, cmdName string, total int64, redi // Record implements glue.Glue. func (Glue) Record(name string, val uint64) { - summary.CollectUint(name, val) + summary.CollectSuccessUnit(name, 1, val) } // GetVersion implements glue.Glue. diff --git a/pkg/kv/session.go b/pkg/kv/session.go index e31d649bc..93ad7293d 100644 --- a/pkg/kv/session.go +++ b/pkg/kv/session.go @@ -111,7 +111,6 @@ func (t *transaction) Len() int { type kvUnionStore struct { kvMemBuf - kv.UnionStore } func (s *kvUnionStore) GetMemBuffer() kv.MemBuffer { @@ -171,10 +170,6 @@ func (t *transaction) Delete(k kv.Key) error { return t.kvMemBuf.Delete(k) } -func (t *transaction) GetUnionStore() kv.UnionStore { - return &t.kvUnionStore -} - // GetTableInfo implements the kv.Transaction interface. func (t *transaction) GetTableInfo(id int64) *model.TableInfo { return nil diff --git a/pkg/lightning/backend/kv/session.go b/pkg/lightning/backend/kv/session.go index d90762b43..25fb7984d 100644 --- a/pkg/lightning/backend/kv/session.go +++ b/pkg/lightning/backend/kv/session.go @@ -94,7 +94,6 @@ func (t *transaction) Len() int { type kvUnionStore struct { kvMemBuf - kv.UnionStore } func (s *kvUnionStore) GetMemBuffer() kv.MemBuffer { @@ -149,10 +148,6 @@ func (t *transaction) Set(k kv.Key, v []byte) error { return t.kvMemBuf.Set(k, v) } -func (t *transaction) GetUnionStore() kv.UnionStore { - return &t.kvUnionStore -} - // GetTableInfo implements the kv.Transaction interface. func (t *transaction) GetTableInfo(id int64) *model.TableInfo { return nil @@ -252,3 +247,8 @@ func (se *session) Value(key fmt.Stringer) interface{} { // StmtAddDirtyTableOP implements the sessionctx.Context interface func (se *session) StmtAddDirtyTableOP(op int, physicalID int64, handle kv.Handle) {} + +// GetInfoSchema implements the sessionctx.Context interface. +func (se *session) GetInfoSchema() sessionctx.InfoschemaMetaVersion { + return nil +} diff --git a/pkg/lightning/backend/kv/sql2kv.go b/pkg/lightning/backend/kv/sql2kv.go index b3268101e..7e188bb90 100644 --- a/pkg/lightning/backend/kv/sql2kv.go +++ b/pkg/lightning/backend/kv/sql2kv.go @@ -408,6 +408,14 @@ func (kvcodec *tableKVEncoder) Encode( return kvPairs(pairs), nil } +func (kvs kvPairs) Size() uint64 { + size := uint64(0) + for _, kv := range kvs { + size += uint64(len(kv.Key) + len(kv.Val)) + } + return size +} + func (kvs kvPairs) ClassifyAndAppend( data *Rows, dataChecksum *verification.KVChecksum, diff --git a/pkg/lightning/backend/kv/types.go b/pkg/lightning/backend/kv/types.go index 299f4a8cb..4ebf65f90 100644 --- a/pkg/lightning/backend/kv/types.go +++ b/pkg/lightning/backend/kv/types.go @@ -35,6 +35,9 @@ type Row interface { indices *Rows, indexChecksum *verification.KVChecksum, ) + + // Size represents the total kv size of this Row. + Size() uint64 } // Rows represents a collection of encoded rows. diff --git a/pkg/lightning/backend/local/local.go b/pkg/lightning/backend/local/local.go index 1edd8c2db..2be3361d5 100644 --- a/pkg/lightning/backend/local/local.go +++ b/pkg/lightning/backend/local/local.go @@ -53,9 +53,12 @@ import ( "golang.org/x/sync/errgroup" "google.golang.org/grpc" "google.golang.org/grpc/backoff" + "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/keepalive" + "google.golang.org/grpc/status" + "github.com/pingcap/br/pkg/conn" "github.com/pingcap/br/pkg/lightning/backend" "github.com/pingcap/br/pkg/lightning/backend/kv" "github.com/pingcap/br/pkg/lightning/common" @@ -103,6 +106,8 @@ var ( localMaxTiKVVersion = version.NextMajorVersion() localMaxPDVersion = version.NextMajorVersion() tiFlashMinVersion = *semver.New("4.0.5") + + errorEngineClosed = errors.New("engine is closed") ) var ( @@ -146,6 +151,7 @@ type metaOrFlush struct { type File struct { localFileMeta + closed atomic.Bool db *pebble.DB UUID uuid.UUID localWriters sync.Map @@ -630,6 +636,9 @@ func (e *File) ingestSSTs(metas []*sstMeta) error { // use raw RLock to avoid change the lock state during flushing. e.mutex.RLock() defer e.mutex.RUnlock() + if e.closed.Load() { + return errorEngineClosed + } totalSize := int64(0) totalCount := int64(0) fileSize := int64(0) @@ -778,6 +787,7 @@ type local struct { engineMemCacheSize int localWriterMemCacheSize int64 + supportMultiIngest bool } // connPool is a lazy pool of gRPC channels. @@ -896,9 +906,41 @@ func NewLocalBackend( localWriterMemCacheSize: int64(cfg.LocalWriterMemCacheSize), } local.conns.conns = make(map[uint64]*connPool) + if err = local.checkMultiIngestSupport(ctx, pdCli); err != nil { + return backend.MakeBackend(nil), err + } + return backend.MakeBackend(local), nil } +func (local *local) checkMultiIngestSupport(ctx context.Context, pdClient pd.Client) error { + stores, err := conn.GetAllTiKVStores(ctx, pdClient, conn.SkipTiFlash) + if err != nil { + return errors.Trace(err) + } + for _, s := range stores { + client, err := local.getImportClient(ctx, s.Id) + if err != nil { + return errors.Trace(err) + } + _, err = client.MultiIngest(ctx, &sst.MultiIngestRequest{}) + if err != nil { + if st, ok := status.FromError(err); ok { + if st.Code() == codes.Unimplemented { + log.L().Info("multi ingest not support", zap.Any("unsupported store", s)) + local.supportMultiIngest = false + return nil + } + } + return errors.Trace(err) + } + } + + local.supportMultiIngest = true + log.L().Info("multi ingest support") + return nil +} + // rlock read locks a local file and returns the File instance if it exists. func (local *local) rLockEngine(engineId uuid.UUID) *File { if e, ok := local.engines.Load(engineId); ok { @@ -924,8 +966,13 @@ func (local *local) tryRLockAllEngines() []*File { var allEngines []*File local.engines.Range(func(k, v interface{}) bool { engine := v.(*File) + // skip closed engine if engine.tryRLock() { - allEngines = append(allEngines, engine) + if !engine.closed.Load() { + allEngines = append(allEngines, engine) + } else { + engine.rUnlock() + } } return true }) @@ -1021,6 +1068,9 @@ func (local *local) FlushEngine(ctx context.Context, engineID uuid.UUID) error { return errors.Errorf("engine '%s' not found", engineID) } defer engineFile.rUnlock() + if engineFile.closed.Load() { + return nil + } return engineFile.flushEngineWithoutLock(ctx) } @@ -1152,9 +1202,20 @@ func (local *local) CloseEngine(ctx context.Context, engineUUID uuid.UUID) error engineFile := engine.(*File) engineFile.rLock() + if engineFile.closed.Load() { + engineFile.rUnlock() + return nil + } + err := engineFile.flushEngineWithoutLock(ctx) engineFile.rUnlock() + + // use mutex to make sure we won't close sstMetasChan while other routines + // trying to do flush. + engineFile.lock(importMutexStateClose) + engineFile.closed.Store(true) close(engineFile.sstMetasChan) + engineFile.unlock() if err != nil { return errors.Trace(err) } @@ -1162,11 +1223,11 @@ func (local *local) CloseEngine(ctx context.Context, engineUUID uuid.UUID) error return engineFile.ingestErr.Get() } -func (local *local) getImportClient(ctx context.Context, peer *metapb.Peer) (sst.ImportSSTClient, error) { +func (local *local) getImportClient(ctx context.Context, storeID uint64) (sst.ImportSSTClient, error) { local.conns.mu.Lock() defer local.conns.mu.Unlock() - conn, err := local.getGrpcConnLocked(ctx, peer.GetStoreId()) + conn, err := local.getGrpcConnLocked(ctx, storeID) if err != nil { return nil, err } @@ -1228,7 +1289,7 @@ func (local *local) WriteToTiKV( clients := make([]sst.ImportSST_WriteClient, 0, len(region.Region.GetPeers())) requests := make([]*sst.WriteRequest, 0, len(region.Region.GetPeers())) for _, peer := range region.Region.GetPeers() { - cli, err := local.getImportClient(ctx, peer) + cli, err := local.getImportClient(ctx, peer.StoreId) if err != nil { return nil, Range{}, stats, err } @@ -1312,16 +1373,21 @@ func (local *local) WriteToTiKV( var leaderPeerMetas []*sst.SSTMeta for i, wStream := range clients { - if resp, closeErr := wStream.CloseAndRecv(); closeErr != nil { + resp, closeErr := wStream.CloseAndRecv() + if closeErr != nil { return nil, Range{}, stats, errors.Trace(closeErr) - } else if leaderID == region.Region.Peers[i].GetId() { + } + if resp.Error != nil { + return nil, Range{}, stats, errors.New(resp.Error.Message) + } + if leaderID == region.Region.Peers[i].GetId() { leaderPeerMetas = resp.Metas log.L().Debug("get metas after write kv stream to tikv", zap.Reflect("metas", leaderPeerMetas)) } } // if there is not leader currently, we should directly return an error - if leaderPeerMetas == nil { + if len(leaderPeerMetas) == 0 { log.L().Warn("write to tikv no leader", logutil.Region(region.Region), logutil.Leader(region.Leader), zap.Uint64("leader_id", leaderID), logutil.SSTMeta(meta), zap.Int64("kv_pairs", totalCount), zap.Int64("total_bytes", size)) @@ -1350,13 +1416,13 @@ func (local *local) WriteToTiKV( return leaderPeerMetas, finishedRange, stats, nil } -func (local *local) Ingest(ctx context.Context, meta *sst.SSTMeta, region *split.RegionInfo) (*sst.IngestResponse, error) { +func (local *local) Ingest(ctx context.Context, metas []*sst.SSTMeta, region *split.RegionInfo) (*sst.IngestResponse, error) { leader := region.Leader if leader == nil { leader = region.Region.GetPeers()[0] } - cli, err := local.getImportClient(ctx, leader) + cli, err := local.getImportClient(ctx, leader.StoreId) if err != nil { return nil, err } @@ -1366,15 +1432,24 @@ func (local *local) Ingest(ctx context.Context, meta *sst.SSTMeta, region *split Peer: leader, } - req := &sst.IngestRequest{ - Context: reqCtx, - Sst: meta, + if !local.supportMultiIngest { + if len(metas) != 1 { + return nil, errors.New("batch ingest is not support") + } + req := &sst.IngestRequest{ + Context: reqCtx, + Sst: metas[0], + } + resp, err := cli.Ingest(ctx, req) + return resp, errors.Trace(err) } - resp, err := cli.Ingest(ctx, req) - if err != nil { - return nil, errors.Trace(err) + + req := &sst.MultiIngestRequest{ + Context: reqCtx, + Ssts: metas, } - return resp, nil + resp, err := cli.MultiIngest(ctx, req) + return resp, errors.Trace(err) } func splitRangeBySizeProps(fullRange Range, sizeProps *sizeProperties, sizeLimit int64, keysLimit int64) []Range { @@ -1671,10 +1746,22 @@ loopWrite: continue loopWrite } - for _, meta := range metas { + if len(metas) == 0 { + return nil + } + + batch := 1 + if local.supportMultiIngest { + batch = len(metas) + } + + for i := 0; i < len(metas); i += batch { + start := i * batch + end := utils.MinInt((i+1)*batch, len(metas)) + ingestMetas := metas[start:end] errCnt := 0 for errCnt < maxRetryTimes { - log.L().Debug("ingest meta", zap.Reflect("meta", meta)) + log.L().Debug("ingest meta", zap.Reflect("meta", ingestMetas)) var resp *sst.IngestResponse failpoint.Inject("FailIngestMeta", func(val failpoint.Value) { // only inject the error once @@ -1702,13 +1789,13 @@ loopWrite: } }) if resp == nil { - resp, err = local.Ingest(ctx, meta, region) + resp, err = local.Ingest(ctx, ingestMetas, region) } if err != nil { if common.IsContextCanceledError(err) { return err } - log.L().Warn("ingest failed", log.ShortError(err), logutil.SSTMeta(meta), + log.L().Warn("ingest failed", log.ShortError(err), logutil.SSTMetas(ingestMetas), logutil.Region(region.Region), logutil.Leader(region.Leader)) errCnt++ continue @@ -1716,7 +1803,7 @@ loopWrite: var retryTy retryType var newRegion *split.RegionInfo - retryTy, newRegion, err = local.isIngestRetryable(ctx, resp, region, meta) + retryTy, newRegion, err = local.isIngestRetryable(ctx, resp, region, ingestMetas) if common.IsContextCanceledError(err) { return err } @@ -1726,7 +1813,7 @@ loopWrite: } switch retryTy { case retryNone: - log.L().Warn("ingest failed noretry", log.ShortError(err), logutil.SSTMeta(meta), + log.L().Warn("ingest failed noretry", log.ShortError(err), logutil.SSTMetas(ingestMetas), logutil.Region(region.Region), logutil.Leader(region.Leader)) // met non-retryable error retry whole Write procedure return err @@ -2151,7 +2238,7 @@ func (local *local) isIngestRetryable( ctx context.Context, resp *sst.IngestResponse, region *split.RegionInfo, - meta *sst.SSTMeta, + metas []*sst.SSTMeta, ) (retryType, *split.RegionInfo, error) { if resp.GetError() == nil { return retryNone, nil, nil @@ -2200,7 +2287,7 @@ func (local *local) isIngestRetryable( if currentRegions := errPb.GetEpochNotMatch().GetCurrentRegions(); currentRegions != nil { var currentRegion *metapb.Region for _, r := range currentRegions { - if insideRegion(r, meta) { + if insideRegion(r, metas) { currentRegion = r break } @@ -2511,6 +2598,10 @@ func (w *Writer) AppendRows(ctx context.Context, tableName string, columnNames [ return nil } + if w.local.closed.Load() { + return errorEngineClosed + } + w.Lock() defer w.Unlock() diff --git a/pkg/lightning/backend/local/local_freebsd.go b/pkg/lightning/backend/local/local_freebsd.go new file mode 100644 index 000000000..c54f40a98 --- /dev/null +++ b/pkg/lightning/backend/local/local_freebsd.go @@ -0,0 +1,26 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build freebsd + +package local + +import ( + "go.uber.org/zap" +) + +type Rlim_t = int64 + +func zapRlim_t(key string, val Rlim_t) zap.Field { + return zap.Int64(key, val) +} diff --git a/pkg/lightning/backend/local/local_test.go b/pkg/lightning/backend/local/local_test.go index 24fa94a2b..50eb68b76 100644 --- a/pkg/lightning/backend/local/local_test.go +++ b/pkg/lightning/backend/local/local_test.go @@ -462,13 +462,21 @@ func (s *localSuite) TestIsIngestRetryable(c *C) { }, }, } - meta := &sst.SSTMeta{ - Range: &sst.Range{ - Start: []byte{1}, - End: []byte{2}, + metas := []*sst.SSTMeta{ + { + Range: &sst.Range{ + Start: []byte{1}, + End: []byte{2}, + }, + }, + { + Range: &sst.Range{ + Start: []byte{1, 1}, + End: []byte{2}, + }, }, } - retryType, newRegion, err := local.isIngestRetryable(ctx, resp, region, meta) + retryType, newRegion, err := local.isIngestRetryable(ctx, resp, region, metas) c.Assert(retryType, Equals, retryWrite) c.Assert(newRegion.Leader.Id, Equals, uint64(2)) c.Assert(err, NotNil) @@ -489,18 +497,18 @@ func (s *localSuite) TestIsIngestRetryable(c *C) { }, }, } - retryType, newRegion, err = local.isIngestRetryable(ctx, resp, region, meta) + retryType, newRegion, err = local.isIngestRetryable(ctx, resp, region, metas) c.Assert(retryType, Equals, retryWrite) c.Assert(newRegion.Region.RegionEpoch.Version, Equals, uint64(2)) c.Assert(err, NotNil) resp.Error = &errorpb.Error{Message: "raft: proposal dropped"} - retryType, _, err = local.isIngestRetryable(ctx, resp, region, meta) + retryType, _, err = local.isIngestRetryable(ctx, resp, region, metas) c.Assert(retryType, Equals, retryWrite) c.Assert(err, NotNil) resp.Error = &errorpb.Error{Message: "unknown error"} - retryType, _, err = local.isIngestRetryable(ctx, resp, region, meta) + retryType, _, err = local.isIngestRetryable(ctx, resp, region, metas) c.Assert(retryType, Equals, retryNone) c.Assert(err, ErrorMatches, "non-retryable error: unknown error") } diff --git a/pkg/lightning/backend/local/local_unix.go b/pkg/lightning/backend/local/local_unix.go index 8fda30b37..c0c4882cf 100644 --- a/pkg/lightning/backend/local/local_unix.go +++ b/pkg/lightning/backend/local/local_unix.go @@ -20,7 +20,6 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" - "go.uber.org/zap" "github.com/pingcap/br/pkg/lightning/log" ) @@ -30,7 +29,7 @@ const ( minRLimit = 1024 ) -func GetSystemRLimit() (uint64, error) { +func GetSystemRLimit() (Rlim_t, error) { var rLimit syscall.Rlimit err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit) return rLimit.Cur, err @@ -39,14 +38,14 @@ func GetSystemRLimit() (uint64, error) { // VerifyRLimit checks whether the open-file limit is large enough. // In Local-backend, we need to read and write a lot of L0 SST files, so we need // to check system max open files limit. -func VerifyRLimit(estimateMaxFiles uint64) error { +func VerifyRLimit(estimateMaxFiles Rlim_t) error { if estimateMaxFiles < minRLimit { estimateMaxFiles = minRLimit } var rLimit syscall.Rlimit err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit) failpoint.Inject("GetRlimitValue", func(v failpoint.Value) { - limit := uint64(v.(int)) + limit := Rlim_t(v.(int)) rLimit.Cur = limit rLimit.Max = limit err = nil @@ -87,6 +86,6 @@ func VerifyRLimit(estimateMaxFiles uint64) error { } log.L().Info("Set the maximum number of open file descriptors(rlimit)", - zap.Uint64("old", prevLimit), zap.Uint64("new", estimateMaxFiles)) + zapRlim_t("old", prevLimit), zapRlim_t("new", estimateMaxFiles)) return nil } diff --git a/pkg/lightning/backend/local/local_unix_generic.go b/pkg/lightning/backend/local/local_unix_generic.go new file mode 100644 index 000000000..362b4c1f1 --- /dev/null +++ b/pkg/lightning/backend/local/local_unix_generic.go @@ -0,0 +1,24 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !freebsd,!windows + +package local + +import "go.uber.org/zap" + +type Rlim_t = uint64 + +func zapRlim_t(key string, val Rlim_t) zap.Field { + return zap.Uint64(key, val) +} diff --git a/pkg/lightning/backend/local/local_windows.go b/pkg/lightning/backend/local/local_windows.go index d746ff4a6..a6e34aad8 100644 --- a/pkg/lightning/backend/local/local_windows.go +++ b/pkg/lightning/backend/local/local_windows.go @@ -21,6 +21,8 @@ import ( "github.com/pingcap/errors" ) +type Rlim_t = uint64 + // return a big value as unlimited, since rlimit verify is skipped in windows. func GetSystemRLimit() (uint64, error) { return math.MaxInt32, nil diff --git a/pkg/lightning/backend/local/localhelper.go b/pkg/lightning/backend/local/localhelper.go index 17b1a728f..2ae206d93 100644 --- a/pkg/lightning/backend/local/localhelper.go +++ b/pkg/lightning/backend/local/localhelper.go @@ -180,7 +180,7 @@ func (local *local) SplitAndScatterRegionByRanges(ctx context.Context, ranges [] } return err1 } else if common.IsContextCanceledError(err1) { - // do not retry on conext.Canceled error + // do not retry on context.Canceled error return err1 } log.L().Warn("split regions", log.ShortError(err1), zap.Int("retry time", i), @@ -189,9 +189,7 @@ func (local *local) SplitAndScatterRegionByRanges(ctx context.Context, ranges [] syncLock.Lock() retryKeys = append(retryKeys, keys[startIdx:]...) // set global error so if we exceed retry limit, the function will return this error - if !common.IsContextCanceledError(err1) { - err = multierr.Append(err, err1) - } + err = multierr.Append(err, err1) syncLock.Unlock() break } else { @@ -236,7 +234,9 @@ func (local *local) SplitAndScatterRegionByRanges(ctx context.Context, ranges [] } close(ch) if splitError := eg.Wait(); splitError != nil { - return splitError + retryKeys = retryKeys[:0] + err = splitError + continue } if len(retryKeys) == 0 { @@ -305,6 +305,8 @@ func paginateScanRegion( sort.Slice(regions, func(i, j int) bool { return bytes.Compare(regions[i].Region.StartKey, regions[j].Region.StartKey) < 0 }) + log.L().Info("paginate scan regions", zap.Int("count", len(regions)), + logutil.Key("start", startKey), logutil.Key("end", endKey)) return regions, nil } @@ -477,9 +479,13 @@ func beforeEnd(key []byte, end []byte) bool { return bytes.Compare(key, end) < 0 || len(end) == 0 } -func insideRegion(region *metapb.Region, meta *sst.SSTMeta) bool { - rg := meta.GetRange() - return keyInsideRegion(region, rg.GetStart()) && keyInsideRegion(region, rg.GetEnd()) +func insideRegion(region *metapb.Region, metas []*sst.SSTMeta) bool { + inside := true + for _, meta := range metas { + rg := meta.GetRange() + inside = inside && (keyInsideRegion(region, rg.GetStart()) && keyInsideRegion(region, rg.GetEnd())) + } + return inside } func keyInsideRegion(region *metapb.Region, key []byte) bool { diff --git a/pkg/lightning/backend/local/localhelper_test.go b/pkg/lightning/backend/local/localhelper_test.go index 7c2c69778..073bfe99c 100644 --- a/pkg/lightning/backend/local/localhelper_test.go +++ b/pkg/lightning/backend/local/localhelper_test.go @@ -20,7 +20,6 @@ import ( "math/rand" "sort" "sync" - "sync/atomic" "time" . "github.com/pingcap/check" @@ -34,6 +33,7 @@ import ( "github.com/pingcap/tidb/util/codec" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule/placement" + "go.uber.org/atomic" "github.com/pingcap/br/pkg/restore" ) @@ -44,7 +44,7 @@ type testClient struct { regions map[uint64]*restore.RegionInfo regionsInfo *core.RegionsInfo // For now it's only used in ScanRegions nextRegionID uint64 - splitCount int + splitCount atomic.Int32 hook clientHook } @@ -148,6 +148,10 @@ func (c *testClient) SplitRegion( func (c *testClient) BatchSplitRegionsWithOrigin( ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte, ) (*restore.RegionInfo, []*restore.RegionInfo, error) { + c.mu.Lock() + defer c.mu.Unlock() + c.splitCount.Inc() + if c.hook != nil { regionInfo, keys = c.hook.BeforeSplitRegion(ctx, regionInfo, keys) } @@ -161,9 +165,6 @@ func (c *testClient) BatchSplitRegionsWithOrigin( default: } - c.splitCount++ - c.mu.Lock() - defer c.mu.Unlock() newRegions := make([]*restore.RegionInfo, 0) target, ok := c.regions[regionInfo.Region.Id] if !ok { @@ -387,7 +388,7 @@ func (d defaultHook) check(c *C, cli *testClient) { // 7. region: [bv, cca), keys: [bw, bx, by, bz] // since it may encounter error retries, here only check the lower threshold. - c.Assert(cli.splitCount >= 7, IsTrue) + c.Assert(cli.splitCount.Load() >= 7, IsTrue) } func (s *localSuite) doTestBatchSplitRegionByRanges(ctx context.Context, c *C, hook clientHook, errPat string, splitHook batchSplitHook) { @@ -473,7 +474,7 @@ func (h batchSizeHook) check(c *C, cli *testClient) { // 10. region: [bv, cca), keys: [bx, by, bz] // since it may encounter error retries, here only check the lower threshold. - c.Assert(cli.splitCount, Equals, 9) + c.Assert(cli.splitCount.Load(), Equals, int32(9)) } func (s *localSuite) TestBatchSplitRegionByRangesKeySizeLimit(c *C) { @@ -517,13 +518,12 @@ func (s *localSuite) TestBatchSplitByRangesEpochNotMatch(c *C) { // return epoch not match error in every other call type splitRegionEpochNotMatchHookRandom struct { noopHook - cnt int32 + cnt atomic.Int32 } func (h *splitRegionEpochNotMatchHookRandom) BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) { regionInfo, keys = h.noopHook.BeforeSplitRegion(ctx, regionInfo, keys) - cnt := atomic.AddInt32(&h.cnt, 1) - if cnt%2 != 0 { + if h.cnt.Inc() != 0 { return regionInfo, keys } regionInfo = cloneRegion(regionInfo) @@ -539,12 +539,12 @@ func (s *localSuite) TestBatchSplitByRangesEpochNotMatchOnce(c *C) { type splitRegionNoValidKeyHook struct { noopHook returnErrTimes int32 - errorCnt int32 + errorCnt atomic.Int32 } -func (h splitRegionNoValidKeyHook) BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) { +func (h *splitRegionNoValidKeyHook) BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) { regionInfo, keys = h.noopHook.BeforeSplitRegion(ctx, regionInfo, keys) - if atomic.AddInt32(&h.errorCnt, 1) <= h.returnErrTimes { + if h.errorCnt.Inc() <= h.returnErrTimes { // clean keys to trigger "no valid keys" error keys = keys[:0] } @@ -552,7 +552,7 @@ func (h splitRegionNoValidKeyHook) BeforeSplitRegion(ctx context.Context, region } func (s *localSuite) TestBatchSplitByRangesNoValidKeysOnce(c *C) { - s.doTestBatchSplitRegionByRanges(context.Background(), c, &splitRegionNoValidKeyHook{returnErrTimes: 1}, ".*no valid key.*", defaultHook{}) + s.doTestBatchSplitRegionByRanges(context.Background(), c, &splitRegionNoValidKeyHook{returnErrTimes: 1}, "", defaultHook{}) } func (s *localSuite) TestBatchSplitByRangesNoValidKeys(c *C) { diff --git a/pkg/lightning/backend/noop/noop.go b/pkg/lightning/backend/noop/noop.go index 1997de40e..42c40cada 100644 --- a/pkg/lightning/backend/noop/noop.go +++ b/pkg/lightning/backend/noop/noop.go @@ -154,6 +154,10 @@ func (e noopEncoder) Encode(log.Logger, []types.Datum, int64, []int) (kv.Row, er type noopRow struct{} +func (r noopRow) Size() uint64 { + return 0 +} + func (r noopRow) ClassifyAndAppend(*kv.Rows, *verification.KVChecksum, *kv.Rows, *verification.KVChecksum) { } diff --git a/pkg/lightning/backend/tidb/tidb.go b/pkg/lightning/backend/tidb/tidb.go index 146008665..317a5e6b7 100644 --- a/pkg/lightning/backend/tidb/tidb.go +++ b/pkg/lightning/backend/tidb/tidb.go @@ -94,6 +94,10 @@ func NewTiDBBackend(db *sql.DB, onDuplicate string) backend.Backend { return backend.MakeBackend(&tidbBackend{db: db, onDuplicate: onDuplicate}) } +func (row tidbRow) Size() uint64 { + return uint64(len(row)) +} + func (row tidbRow) ClassifyAndAppend(data *kv.Rows, checksum *verification.KVChecksum, _ *kv.Rows, _ *verification.KVChecksum) { rows := (*data).(tidbRows) // Cannot do `rows := data.(*tidbRows); *rows = append(*rows, row)`. @@ -500,48 +504,23 @@ func (be *tidbBackend) FetchRemoteTableModels(ctx context.Context, schemaName st if rows.Err() != nil { return rows.Err() } - // for version < v4.0.0 we can use `show table next_row_id` to fetch auto id info, so about should be enough + // shard_row_id/auto random is only available after tidb v4.0.0 + // `show table next_row_id` is also not available before tidb v4.0.0 if tidbVersion.Major < 4 { return nil } + // init auto id column for each table for _, tbl := range tables { tblName := common.UniqueTable(schemaName, tbl.Name.O) - rows, e = tx.Query(fmt.Sprintf("SHOW TABLE %s NEXT_ROW_ID", tblName)) - if e != nil { - return e + autoIDInfos, err := FetchTableAutoIDInfos(ctx, tx, tblName) + if err != nil { + return errors.Trace(err) } - for rows.Next() { - var ( - dbName, tblName, columnName, idType string - nextID int64 - ) - columns, err := rows.Columns() - if err != nil { - return err - } - - // +--------------+------------+-------------+--------------------+----------------+ - // | DB_NAME | TABLE_NAME | COLUMN_NAME | NEXT_GLOBAL_ROW_ID | ID_TYPE | - // +--------------+------------+-------------+--------------------+----------------+ - // | testsysbench | t | _tidb_rowid | 1 | AUTO_INCREMENT | - // +--------------+------------+-------------+--------------------+----------------+ - - // if columns length is 4, it doesn't contains the last column `ID_TYPE`, and it will always be 'AUTO_INCREMENT' - // for v4.0.0~v4.0.2 show table t next_row_id only returns 4 columns. - if len(columns) == 4 { - err = rows.Scan(&dbName, &tblName, &columnName, &nextID) - idType = "AUTO_INCREMENT" - } else { - err = rows.Scan(&dbName, &tblName, &columnName, &nextID, &idType) - } - if err != nil { - return err - } - + for _, info := range autoIDInfos { for _, col := range tbl.Columns { - if col.Name.O == columnName { - switch idType { + if col.Name.O == info.Column { + switch info.Type { case "AUTO_INCREMENT": col.Flag |= mysql.AutoIncrementFlag case "AUTO_RANDOM": @@ -553,14 +532,7 @@ func (be *tidbBackend) FetchRemoteTableModels(ctx context.Context, schemaName st } } } - // Defer in for-loop would be costly, anyway, we don't need those rows after this turn of iteration. - //nolint:sqlclosecheck - if err := rows.Close(); err != nil { - return errors.Trace(err) - } - if rows.Err() != nil { - return errors.Trace(rows.Err()) - } + } return nil }) @@ -607,3 +579,59 @@ func (w *Writer) AppendRows(ctx context.Context, tableName string, columnNames [ func (w *Writer) IsSynced() bool { return true } + +type TableAutoIDInfo struct { + Column string + NextID int64 + Type string +} + +func FetchTableAutoIDInfos(ctx context.Context, exec common.QueryExecutor, tableName string) ([]*TableAutoIDInfo, error) { + rows, e := exec.QueryContext(ctx, fmt.Sprintf("SHOW TABLE %s NEXT_ROW_ID", tableName)) + if e != nil { + return nil, errors.Trace(e) + } + var autoIDInfos []*TableAutoIDInfo + for rows.Next() { + var ( + dbName, tblName, columnName, idType string + nextID int64 + ) + columns, err := rows.Columns() + if err != nil { + return nil, errors.Trace(err) + } + + //+--------------+------------+-------------+--------------------+----------------+ + //| DB_NAME | TABLE_NAME | COLUMN_NAME | NEXT_GLOBAL_ROW_ID | ID_TYPE | + //+--------------+------------+-------------+--------------------+----------------+ + //| testsysbench | t | _tidb_rowid | 1 | AUTO_INCREMENT | + //+--------------+------------+-------------+--------------------+----------------+ + + // if columns length is 4, it doesn't contains the last column `ID_TYPE`, and it will always be 'AUTO_INCREMENT' + // for v4.0.0~v4.0.2 show table t next_row_id only returns 4 columns. + if len(columns) == 4 { + err = rows.Scan(&dbName, &tblName, &columnName, &nextID) + idType = "AUTO_INCREMENT" + } else { + err = rows.Scan(&dbName, &tblName, &columnName, &nextID, &idType) + } + if err != nil { + return nil, errors.Trace(err) + } + autoIDInfos = append(autoIDInfos, &TableAutoIDInfo{ + Column: columnName, + NextID: nextID, + Type: idType, + }) + } + // Defer in for-loop would be costly, anyway, we don't need those rows after this turn of iteration. + //nolint:sqlclosecheck + if err := rows.Close(); err != nil { + return nil, errors.Trace(err) + } + if rows.Err() != nil { + return nil, errors.Trace(rows.Err()) + } + return autoIDInfos, nil +} diff --git a/pkg/lightning/checkpoints/checkpoints.go b/pkg/lightning/checkpoints/checkpoints.go index 70c9053bb..d412553e5 100644 --- a/pkg/lightning/checkpoints/checkpoints.go +++ b/pkg/lightning/checkpoints/checkpoints.go @@ -23,7 +23,6 @@ import ( "math" "os" "sort" - "strings" "sync" "github.com/joho/sqltocsv" @@ -63,7 +62,7 @@ const ( // the table names to store each kind of checkpoint in the checkpoint database // remember to increase the version number in case of incompatible change. CheckpointTableNameTask = "task_v2" - CheckpointTableNameTable = "table_v6" + CheckpointTableNameTable = "table_v7" CheckpointTableNameEngine = "engine_v5" CheckpointTableNameChunk = "chunk_v5" @@ -99,6 +98,9 @@ const ( table_id bigint NOT NULL DEFAULT 0, create_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + kv_bytes bigint unsigned NOT NULL DEFAULT 0, + kv_kvs bigint unsigned NOT NULL DEFAULT 0, + kv_checksum bigint unsigned NOT NULL DEFAULT 0, INDEX(task_id) );` CreateEngineTableTemplate = ` @@ -154,7 +156,7 @@ const ( FROM %s.%s WHERE table_name = ? ORDER BY engine_id, path, offset;` ReadTableRemainTemplate = ` - SELECT status, alloc_base, table_id FROM %s.%s WHERE table_name = ?;` + SELECT status, alloc_base, table_id, kv_bytes, kv_kvs, kv_checksum FROM %s.%s WHERE table_name = ?;` ReplaceEngineTemplate = ` REPLACE INTO %s.%s (table_name, engine_id, status) VALUES (?, ?, ?);` ReplaceChunkTemplate = ` @@ -176,7 +178,8 @@ const ( UPDATE %s.%s SET alloc_base = GREATEST(?, alloc_base) WHERE table_name = ?;` UpdateTableStatusTemplate = ` UPDATE %s.%s SET status = ? WHERE table_name = ?;` - UpdateEngineTemplate = ` + UpdateTableChecksumTemplate = `UPDATE %s.%s SET kv_bytes = ?, kv_kvs = ?, kv_checksum = ? WHERE table_name = ?;` + UpdateEngineTemplate = ` UPDATE %s.%s SET status = ? WHERE (table_name, engine_id) = (?, ?);` DeleteCheckpointRecordTemplate = "DELETE FROM %s.%s WHERE table_name = ?;" ) @@ -278,6 +281,8 @@ type TableCheckpoint struct { AllocBase int64 Engines map[int32]*EngineCheckpoint TableID int64 + // remote checksum before restore + Checksum verify.KVChecksum } func (cp *TableCheckpoint) DeepCopy() *TableCheckpoint { @@ -290,6 +295,7 @@ func (cp *TableCheckpoint) DeepCopy() *TableCheckpoint { AllocBase: cp.AllocBase, Engines: engines, TableID: cp.TableID, + Checksum: cp.Checksum, } } @@ -315,11 +321,13 @@ type engineCheckpointDiff struct { } type TableCheckpointDiff struct { - hasStatus bool - hasRebase bool - status CheckpointStatus - allocBase int64 - engines map[int32]engineCheckpointDiff + hasStatus bool + hasRebase bool + hasChecksum bool + status CheckpointStatus + allocBase int64 + engines map[int32]engineCheckpointDiff + checksum verify.KVChecksum } func NewTableCheckpointDiff() *TableCheckpointDiff { @@ -438,6 +446,15 @@ func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { }) } +type TableChecksumMerger struct { + Checksum verify.KVChecksum +} + +func (m *TableChecksumMerger) MergeInto(cpd *TableCheckpointDiff) { + cpd.hasChecksum = true + cpd.checksum = m.Checksum +} + type RebaseCheckpointMerger struct { AllocBase int64 } @@ -591,10 +608,7 @@ type MySQLCheckpointsDB struct { } func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) (*MySQLCheckpointsDB, error) { - var escapedSchemaName strings.Builder - common.WriteMySQLIdentifier(&escapedSchemaName, schemaName) - schema := escapedSchemaName.String() - + schema := common.EscapeIdentifier(schemaName) sql := common.SQLWithRetry{ DB: db, Logger: log.With(zap.String("schema", schemaName)), @@ -780,12 +794,13 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab tableRow := tx.QueryRowContext(c, tableQuery, tableName) var status uint8 - if err := tableRow.Scan(&status, &cp.AllocBase, &cp.TableID); err != nil { + var kvs, bytes, checksum uint64 + if err := tableRow.Scan(&status, &cp.AllocBase, &cp.TableID, &bytes, &kvs, &checksum); err != nil { if err == sql.ErrNoRows { return errors.NotFoundf("checkpoint for table %s", tableName) } - return errors.Trace(err) } + cp.Checksum = verify.MakeKVChecksum(bytes, kvs, checksum) cp.Status = CheckpointStatus(status) return nil }) @@ -849,6 +864,7 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi chunkQuery := fmt.Sprintf(UpdateChunkTemplate, cpdb.schema, CheckpointTableNameChunk) rebaseQuery := fmt.Sprintf(UpdateTableRebaseTemplate, cpdb.schema, CheckpointTableNameTable) tableStatusQuery := fmt.Sprintf(UpdateTableStatusTemplate, cpdb.schema, CheckpointTableNameTable) + tableChecksumQuery := fmt.Sprintf(UpdateTableChecksumTemplate, cpdb.schema, CheckpointTableNameTable) engineStatusQuery := fmt.Sprintf(UpdateEngineTemplate, cpdb.schema, CheckpointTableNameEngine) s := common.SQLWithRetry{DB: cpdb.db, Logger: log.L()} @@ -868,12 +884,16 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi return errors.Trace(e) } defer tableStatusStmt.Close() + tableChecksumStmt, e := tx.PrepareContext(c, tableChecksumQuery) + if e != nil { + return errors.Trace(e) + } + defer tableChecksumStmt.Close() engineStatusStmt, e := tx.PrepareContext(c, engineStatusQuery) if e != nil { return errors.Trace(e) } defer engineStatusStmt.Close() - for tableName, cpd := range checkpointDiffs { if cpd.hasStatus { if _, e := tableStatusStmt.ExecContext(c, cpd.status, tableName); e != nil { @@ -885,6 +905,11 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi return errors.Trace(e) } } + if cpd.hasChecksum { + if _, e := tableChecksumStmt.ExecContext(c, cpd.checksum.SumSize(), cpd.checksum.SumKVS(), cpd.checksum.Sum(), tableName); e != nil { + return errors.Trace(e) + } + } for engineID, engineDiff := range cpd.engines { if engineDiff.hasStatus { if _, e := engineStatusStmt.ExecContext(c, engineDiff.status, tableName, engineID); e != nil { @@ -1054,6 +1079,7 @@ func (cpdb *FileCheckpointsDB) Get(_ context.Context, tableName string) (*TableC AllocBase: tableModel.AllocBase, Engines: make(map[int32]*EngineCheckpoint, len(tableModel.Engines)), TableID: tableModel.TableID, + Checksum: verify.MakeKVChecksum(tableModel.KvBytes, tableModel.KvKvs, tableModel.KvChecksum), } for engineID, engineModel := range tableModel.Engines { @@ -1152,6 +1178,11 @@ func (cpdb *FileCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoin if cpd.hasRebase { tableModel.AllocBase = cpd.allocBase } + if cpd.hasChecksum { + tableModel.KvBytes = cpd.checksum.SumSize() + tableModel.KvKvs = cpd.checksum.SumKVS() + tableModel.KvChecksum = cpd.checksum.Sum() + } for engineID, engineDiff := range cpd.engines { engineModel := tableModel.Engines[engineID] if engineDiff.hasStatus { diff --git a/pkg/lightning/checkpoints/checkpoints_file_test.go b/pkg/lightning/checkpoints/checkpoints_file_test.go index 32dfc3647..a3df9c35b 100644 --- a/pkg/lightning/checkpoints/checkpoints_file_test.go +++ b/pkg/lightning/checkpoints/checkpoints_file_test.go @@ -117,6 +117,10 @@ func (s *cpFileSuite) SetUpTest(c *C) { AllocBase: 132861, } rcm.MergeInto(cpd) + cksum := checkpoints.TableChecksumMerger{ + Checksum: verification.MakeKVChecksum(4492, 686, 486070148910), + } + cksum.MergeInto(cpd) ccm := checkpoints.ChunkCheckpointMerger{ EngineID: 0, Key: checkpoints.ChunkCheckpointKey{Path: "/tmp/path/1.sql", Offset: 0}, @@ -158,6 +162,7 @@ func (s *cpFileSuite) TestGet(c *C) { c.Assert(cp, DeepEquals, &checkpoints.TableCheckpoint{ Status: checkpoints.CheckpointStatusAllWritten, AllocBase: 132861, + Checksum: verification.MakeKVChecksum(4492, 686, 486070148910), Engines: map[int32]*checkpoints.EngineCheckpoint{ -1: { Status: checkpoints.CheckpointStatusLoaded, diff --git a/pkg/lightning/checkpoints/checkpoints_sql_test.go b/pkg/lightning/checkpoints/checkpoints_sql_test.go index a9d18aa0b..146d1dffa 100644 --- a/pkg/lightning/checkpoints/checkpoints_sql_test.go +++ b/pkg/lightning/checkpoints/checkpoints_sql_test.go @@ -175,6 +175,10 @@ func (s *cpSQLSuite) TestNormalOperations(c *C) { AllocBase: 132861, } rcm.MergeInto(cpd) + cksum := checkpoints.TableChecksumMerger{ + Checksum: verification.MakeKVChecksum(4492, 686, 486070148910), + } + cksum.MergeInto(cpd) ccm := checkpoints.ChunkCheckpointMerger{ EngineID: 0, Key: checkpoints.ChunkCheckpointKey{Path: "/tmp/path/1.sql", Offset: 0}, @@ -208,6 +212,12 @@ func (s *cpSQLSuite) TestNormalOperations(c *C) { ExpectExec(). WithArgs(60, "`db1`.`t2`"). WillReturnResult(sqlmock.NewResult(14, 1)) + s.mock. + ExpectPrepare("UPDATE `mock-schema`\\.table_v\\d+ SET kv_bytes = .+"). + ExpectExec(). + WithArgs(4492, 686, 486070148910, "`db1`.`t2`"). + WillReturnResult(sqlmock.NewResult(15, 1)) + s.mock.ExpectCommit() s.mock.MatchExpectationsInOrder(false) @@ -245,8 +255,8 @@ func (s *cpSQLSuite) TestNormalOperations(c *C) { ExpectQuery("SELECT .+ FROM `mock-schema`\\.table_v\\d+"). WithArgs("`db1`.`t2`"). WillReturnRows( - sqlmock.NewRows([]string{"status", "alloc_base", "table_id"}). - AddRow(60, 132861, int64(2)), + sqlmock.NewRows([]string{"status", "alloc_base", "table_id", "kv_bytes", "kv_kvs", "kv_checksum"}). + AddRow(60, 132861, int64(2), uint64(4492), uint64(686), uint64(486070148910)), ) s.mock.ExpectCommit() @@ -282,6 +292,7 @@ func (s *cpSQLSuite) TestNormalOperations(c *C) { }}, }, }, + Checksum: verification.MakeKVChecksum(4492, 686, 486070148910), }) c.Assert(s.mock.ExpectationsWereMet(), IsNil) } diff --git a/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.pb.go b/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.pb.go index 10d9d5539..523a01fd2 100644 --- a/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.pb.go +++ b/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.pb.go @@ -109,11 +109,14 @@ func (m *TaskCheckpointModel) XXX_DiscardUnknown() { var xxx_messageInfo_TaskCheckpointModel proto.InternalMessageInfo type TableCheckpointModel struct { - Hash []byte `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"` - Status uint32 `protobuf:"varint,3,opt,name=status,proto3" json:"status,omitempty"` - AllocBase int64 `protobuf:"varint,4,opt,name=alloc_base,json=allocBase,proto3" json:"alloc_base,omitempty"` - Engines map[int32]*EngineCheckpointModel `protobuf:"bytes,8,rep,name=engines,proto3" json:"engines,omitempty" protobuf_key:"zigzag32,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - TableID int64 `protobuf:"varint,9,opt,name=tableID,proto3" json:"tableID,omitempty"` + Hash []byte `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"` + Status uint32 `protobuf:"varint,3,opt,name=status,proto3" json:"status,omitempty"` + AllocBase int64 `protobuf:"varint,4,opt,name=alloc_base,json=allocBase,proto3" json:"alloc_base,omitempty"` + Engines map[int32]*EngineCheckpointModel `protobuf:"bytes,8,rep,name=engines,proto3" json:"engines,omitempty" protobuf_key:"zigzag32,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + TableID int64 `protobuf:"varint,9,opt,name=tableID,proto3" json:"tableID,omitempty"` + KvBytes uint64 `protobuf:"varint,10,opt,name=kv_bytes,json=kvBytes,proto3" json:"kv_bytes,omitempty"` + KvKvs uint64 `protobuf:"varint,11,opt,name=kv_kvs,json=kvKvs,proto3" json:"kv_kvs,omitempty"` + KvChecksum uint64 `protobuf:"fixed64,12,opt,name=kv_checksum,json=kvChecksum,proto3" json:"kv_checksum,omitempty"` } func (m *TableCheckpointModel) Reset() { *m = TableCheckpointModel{} } @@ -255,59 +258,62 @@ func init() { } var fileDescriptor_c57c7b77a714394c = []byte{ - // 829 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0x41, 0x6f, 0x1b, 0x45, - 0x14, 0xce, 0x7a, 0x63, 0xc7, 0x1e, 0xdb, 0xa9, 0x33, 0xa4, 0x65, 0x08, 0x60, 0x19, 0x97, 0x83, - 0xa5, 0x52, 0x5b, 0x2a, 0x17, 0x54, 0x01, 0x12, 0x49, 0x2a, 0x51, 0x45, 0x15, 0xd1, 0x50, 0x38, - 0x70, 0x59, 0xcd, 0xee, 0x4c, 0xec, 0xd5, 0x7a, 0x77, 0x56, 0x33, 0xb3, 0x4b, 0xdd, 0xff, 0x80, - 0xc4, 0xcf, 0xe0, 0x4f, 0x70, 0xaf, 0x38, 0xf5, 0xc8, 0x11, 0x92, 0x3b, 0xbf, 0x01, 0xcd, 0x9b, - 0x75, 0xbc, 0xae, 0xac, 0xa8, 0xb7, 0xf7, 0xbe, 0xf7, 0xbd, 0x6f, 0xde, 0x7b, 0xfb, 0x66, 0x16, - 0x7d, 0x9b, 0x27, 0xf3, 0xd9, 0x32, 0x9e, 0x2f, 0x4c, 0x16, 0x67, 0xf3, 0x59, 0xb4, 0x10, 0x51, - 0x92, 0xcb, 0x38, 0x33, 0xba, 0x6e, 0xe7, 0xe1, 0xec, 0x2a, 0x5e, 0x8a, 0xa0, 0x06, 0x4d, 0x73, - 0x25, 0x8d, 0x3c, 0x79, 0x3c, 0x8f, 0xcd, 0xa2, 0x08, 0xa7, 0x91, 0x4c, 0x67, 0x73, 0x39, 0x97, - 0x33, 0x80, 0xc3, 0xe2, 0x0a, 0x3c, 0x70, 0xc0, 0x72, 0xf4, 0xf1, 0x7f, 0x1e, 0x1a, 0x9c, 0x6d, - 0x44, 0x5e, 0x48, 0x2e, 0x96, 0xf8, 0x1c, 0x75, 0x6b, 0xc2, 0xc4, 0x1b, 0xf9, 0x93, 0xee, 0x93, - 0xf1, 0xf4, 0x5d, 0x5e, 0x1d, 0x78, 0x96, 0x19, 0xb5, 0xa2, 0xf5, 0x34, 0xfc, 0x0d, 0xba, 0x67, - 0x98, 0x4e, 0x6a, 0x35, 0x92, 0xc6, 0xc8, 0x9b, 0x74, 0x9f, 0x1c, 0x4f, 0x5f, 0x32, 0x9d, 0x6c, - 0x92, 0x41, 0x8c, 0x1e, 0x9a, 0x2d, 0xf0, 0xe4, 0xa7, 0xad, 0xc2, 0x40, 0x1f, 0x0f, 0x90, 0x9f, - 0x88, 0x15, 0xf1, 0x46, 0xde, 0xa4, 0x43, 0xad, 0x89, 0x1f, 0xa1, 0x66, 0xc9, 0x96, 0x85, 0xa8, - 0xa4, 0xef, 0x4f, 0x5f, 0xb2, 0x70, 0x29, 0xde, 0xd5, 0x76, 0x9c, 0xa7, 0x8d, 0xaf, 0xbc, 0xf1, - 0x1f, 0x0d, 0xf4, 0xc1, 0x8e, 0xe3, 0xf1, 0x87, 0xe8, 0x00, 0xaa, 0x8d, 0x39, 0xc8, 0xfb, 0xb4, - 0x65, 0xdd, 0xe7, 0x1c, 0x7f, 0x8a, 0x90, 0x96, 0x85, 0x8a, 0x44, 0xc0, 0x63, 0x05, 0xc7, 0x74, - 0x68, 0xc7, 0x21, 0xe7, 0xb1, 0xc2, 0x04, 0x1d, 0x84, 0x2c, 0x4a, 0x44, 0xc6, 0x89, 0x0f, 0xb1, - 0xb5, 0x8b, 0x1f, 0xa2, 0x7e, 0x9c, 0xe6, 0x52, 0x19, 0xa1, 0x02, 0xc6, 0xb9, 0x22, 0xfb, 0x10, - 0xef, 0xad, 0xc1, 0xef, 0x38, 0x57, 0xf8, 0x63, 0xd4, 0x31, 0x31, 0x0f, 0x83, 0x85, 0xd4, 0x86, - 0x34, 0x81, 0xd0, 0xb6, 0xc0, 0xf7, 0x52, 0x9b, 0xdb, 0xa0, 0xe5, 0x93, 0xd6, 0xc8, 0x9b, 0x34, - 0x5d, 0xf0, 0x52, 0x2a, 0x63, 0x0b, 0xce, 0xb9, 0x13, 0x3e, 0x80, 0xbc, 0x56, 0xce, 0x41, 0x72, - 0x8c, 0xfa, 0xda, 0x1e, 0xc0, 0x83, 0xa4, 0x84, 0x9a, 0xdb, 0x10, 0xee, 0x3a, 0xf0, 0xa2, 0xb4, - 0x55, 0x3f, 0x44, 0xfd, 0xdb, 0x1d, 0x0b, 0x4a, 0xa1, 0x48, 0xc7, 0xd5, 0x76, 0x0b, 0xfe, 0x2c, - 0xd4, 0xf8, 0xb7, 0x06, 0x3a, 0xde, 0x35, 0x4e, 0x8c, 0xd1, 0xfe, 0x82, 0xe9, 0x05, 0x0c, 0xaa, - 0x47, 0xc1, 0xc6, 0x0f, 0x50, 0x4b, 0x1b, 0x66, 0x0a, 0x0d, 0x63, 0xe8, 0xd3, 0xca, 0xb3, 0xe3, - 0x63, 0xcb, 0xa5, 0x8c, 0x82, 0x90, 0x69, 0x01, 0x23, 0xf0, 0x69, 0x07, 0x90, 0x53, 0xa6, 0x05, - 0xfe, 0x1a, 0x1d, 0x88, 0x6c, 0x1e, 0x67, 0x42, 0x93, 0x76, 0xb5, 0x66, 0xbb, 0x8e, 0x9c, 0x3e, - 0x73, 0x24, 0xb7, 0x66, 0xeb, 0x14, 0x3b, 0x7c, 0x63, 0xd9, 0xcf, 0xcf, 0xa1, 0x01, 0x9f, 0xae, - 0xdd, 0x13, 0x8a, 0x7a, 0xf5, 0x94, 0xfa, 0xe6, 0x1c, 0xb9, 0xcd, 0xf9, 0x62, 0x7b, 0x73, 0x1e, - 0x54, 0x47, 0xdc, 0xb1, 0x3a, 0x7f, 0x7a, 0xe8, 0xfe, 0x4e, 0x52, 0xad, 0x79, 0x6f, 0xab, 0xf9, - 0xa7, 0xa8, 0x15, 0x2d, 0x8a, 0x2c, 0xd1, 0xa4, 0x51, 0x35, 0xb7, 0x33, 0x7f, 0x7a, 0x06, 0x24, - 0xd7, 0x5c, 0x95, 0x71, 0x72, 0x89, 0xba, 0x35, 0xf8, 0x7d, 0x56, 0x1f, 0xe8, 0x77, 0xd4, 0xff, - 0x97, 0x8f, 0x8e, 0x77, 0x71, 0xec, 0xf7, 0xcc, 0x99, 0x59, 0x54, 0xe2, 0x60, 0xdb, 0x96, 0xe4, - 0xd5, 0x95, 0x16, 0xee, 0xd2, 0xfa, 0xb4, 0xf2, 0xf0, 0x63, 0x84, 0x23, 0xb9, 0x2c, 0xd2, 0x2c, - 0xc8, 0x85, 0x4a, 0x0b, 0xc3, 0x4c, 0x2c, 0x33, 0xd2, 0x1b, 0xf9, 0x93, 0x26, 0x3d, 0x72, 0x91, - 0xcb, 0x4d, 0xc0, 0x7e, 0x7e, 0x91, 0xf1, 0xa0, 0x92, 0x6a, 0xba, 0xcf, 0x2f, 0x32, 0xfe, 0x83, - 0x53, 0x1b, 0x20, 0x3f, 0x97, 0x1a, 0x76, 0xdb, 0xa7, 0xd6, 0xc4, 0x9f, 0xa3, 0xc3, 0x5c, 0x89, - 0x32, 0x50, 0xf2, 0xd7, 0x98, 0x07, 0x29, 0x7b, 0x05, 0xdb, 0xed, 0xd3, 0x9e, 0x45, 0xa9, 0x05, - 0x5f, 0xb0, 0x57, 0xf6, 0x66, 0x6c, 0x08, 0x6d, 0x20, 0xb4, 0x55, 0x2d, 0x98, 0x94, 0x51, 0x10, - 0xae, 0x8c, 0xd0, 0xb0, 0x17, 0xfb, 0xb4, 0x9d, 0x94, 0xd1, 0xa9, 0xf5, 0xed, 0xb5, 0xb1, 0xc1, - 0xa4, 0xd4, 0x04, 0x41, 0xa8, 0x95, 0x94, 0xd1, 0x45, 0xa9, 0xf1, 0x67, 0xa8, 0x67, 0x03, 0xf0, - 0x5a, 0xe9, 0x22, 0x25, 0xdd, 0x91, 0x37, 0x69, 0xd1, 0x6e, 0x52, 0x46, 0x67, 0x15, 0x84, 0x3f, - 0xb1, 0xf7, 0x31, 0x15, 0xda, 0xb0, 0x34, 0x27, 0xfd, 0x91, 0x37, 0x19, 0xd0, 0x0d, 0x60, 0xa7, - 0x68, 0x56, 0xb9, 0x20, 0x87, 0x70, 0x51, 0xc1, 0xc6, 0x23, 0xd4, 0x8d, 0x64, 0x9a, 0x2b, 0xa1, - 0xb5, 0x1d, 0xd3, 0x3d, 0x08, 0xd5, 0x21, 0xfc, 0x11, 0x6a, 0xdb, 0x8b, 0x19, 0xd8, 0x8f, 0x3b, - 0x70, 0x0f, 0x88, 0xf5, 0x2f, 0xc4, 0xca, 0xf6, 0x01, 0x8f, 0xbc, 0x8e, 0x5f, 0x0b, 0x72, 0xe4, - 0x9a, 0xb4, 0xc0, 0x8f, 0xf1, 0x6b, 0x71, 0xfa, 0xe8, 0xcd, 0xbf, 0xc3, 0xbd, 0x37, 0xd7, 0x43, - 0xef, 0xed, 0xf5, 0xd0, 0xfb, 0xe7, 0x7a, 0xe8, 0xfd, 0x7e, 0x33, 0xdc, 0x7b, 0x7b, 0x33, 0xdc, - 0xfb, 0xfb, 0x66, 0xb8, 0xf7, 0x4b, 0x7f, 0xeb, 0x5f, 0x11, 0xb6, 0xe0, 0xb1, 0xff, 0xf2, 0xff, - 0x00, 0x00, 0x00, 0xff, 0xff, 0x7b, 0xd6, 0xd3, 0x15, 0x5d, 0x06, 0x00, 0x00, + // 870 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xcd, 0x6e, 0xdb, 0x46, + 0x10, 0x36, 0x45, 0x8b, 0x92, 0x96, 0x92, 0x23, 0x6f, 0xed, 0x64, 0xeb, 0xb6, 0x2a, 0xab, 0xf4, + 0x20, 0x20, 0x8d, 0x04, 0xa4, 0x97, 0x22, 0x68, 0x0b, 0xd4, 0x76, 0x80, 0x06, 0x46, 0x50, 0x63, + 0x9b, 0xf6, 0xd0, 0x0b, 0xc1, 0x9f, 0xb5, 0x44, 0xac, 0xc8, 0x25, 0xb8, 0x4b, 0x36, 0xca, 0x53, + 0xf4, 0x31, 0xfa, 0x12, 0xbd, 0x07, 0x3d, 0xe5, 0xd8, 0x63, 0x6a, 0xdf, 0xfb, 0x0c, 0xc5, 0xce, + 0x52, 0x16, 0x15, 0x08, 0x46, 0x6e, 0x33, 0xdf, 0x7c, 0x3b, 0x3b, 0xfb, 0xe9, 0x1b, 0x11, 0x7d, + 0x9f, 0xf3, 0xf9, 0x6c, 0x99, 0xcc, 0x17, 0x2a, 0x4b, 0xb2, 0xf9, 0x2c, 0x5a, 0xb0, 0x88, 0xe7, + 0x22, 0xc9, 0x94, 0x6c, 0xc6, 0x79, 0x38, 0xbb, 0x4a, 0x96, 0xcc, 0x6f, 0x40, 0xd3, 0xbc, 0x10, + 0x4a, 0x9c, 0x3c, 0x9e, 0x27, 0x6a, 0x51, 0x86, 0xd3, 0x48, 0xa4, 0xb3, 0xb9, 0x98, 0x8b, 0x19, + 0xc0, 0x61, 0x79, 0x05, 0x19, 0x24, 0x10, 0x19, 0xfa, 0xf8, 0x3f, 0x0b, 0x0d, 0xcf, 0x36, 0x4d, + 0x5e, 0x88, 0x98, 0x2d, 0xf1, 0x39, 0x72, 0x1b, 0x8d, 0x89, 0xe5, 0xd9, 0x13, 0xf7, 0xc9, 0x78, + 0xfa, 0x3e, 0xaf, 0x09, 0x3c, 0xcb, 0x54, 0xb1, 0xa2, 0xcd, 0x63, 0xf8, 0x3b, 0x74, 0x4f, 0x05, + 0x92, 0x37, 0x66, 0x24, 0x2d, 0xcf, 0x9a, 0xb8, 0x4f, 0x8e, 0xa6, 0x2f, 0x03, 0xc9, 0x37, 0x87, + 0xa1, 0x19, 0x3d, 0x50, 0x5b, 0xe0, 0xc9, 0x2f, 0x5b, 0x83, 0x41, 0x7f, 0x3c, 0x44, 0x36, 0x67, + 0x2b, 0x62, 0x79, 0xd6, 0xa4, 0x47, 0x75, 0x88, 0x1f, 0xa1, 0x76, 0x15, 0x2c, 0x4b, 0x56, 0xb7, + 0x3e, 0x9e, 0xbe, 0x0c, 0xc2, 0x25, 0x7b, 0xbf, 0xb7, 0xe1, 0x3c, 0x6d, 0x7d, 0x63, 0x8d, 0xff, + 0x6c, 0xa1, 0x8f, 0x76, 0x5c, 0x8f, 0x1f, 0xa0, 0x0e, 0x4c, 0x9b, 0xc4, 0xd0, 0xde, 0xa6, 0x8e, + 0x4e, 0x9f, 0xc7, 0xf8, 0x33, 0x84, 0xa4, 0x28, 0x8b, 0x88, 0xf9, 0x71, 0x52, 0xc0, 0x35, 0x3d, + 0xda, 0x33, 0xc8, 0x79, 0x52, 0x60, 0x82, 0x3a, 0x61, 0x10, 0x71, 0x96, 0xc5, 0xc4, 0x86, 0xda, + 0x3a, 0xc5, 0x0f, 0xd1, 0x20, 0x49, 0x73, 0x51, 0x28, 0x56, 0xf8, 0x41, 0x1c, 0x17, 0x64, 0x1f, + 0xea, 0xfd, 0x35, 0xf8, 0x43, 0x1c, 0x17, 0xf8, 0x13, 0xd4, 0x53, 0x49, 0x1c, 0xfa, 0x0b, 0x21, + 0x15, 0x69, 0x03, 0xa1, 0xab, 0x81, 0x1f, 0x85, 0x54, 0xb7, 0x45, 0xcd, 0x27, 0x8e, 0x67, 0x4d, + 0xda, 0xa6, 0x78, 0x29, 0x0a, 0xa5, 0x07, 0xce, 0x63, 0xd3, 0xb8, 0x03, 0xe7, 0x9c, 0x3c, 0x86, + 0x96, 0x63, 0x34, 0x90, 0xfa, 0x82, 0xd8, 0xe7, 0x15, 0xcc, 0xdc, 0x85, 0xb2, 0x6b, 0xc0, 0x8b, + 0x4a, 0x4f, 0xfd, 0x10, 0x0d, 0x6e, 0x3d, 0xe6, 0x57, 0xac, 0x20, 0x3d, 0x33, 0xdb, 0x2d, 0xf8, + 0x2b, 0x2b, 0xc6, 0xef, 0x5a, 0xe8, 0x68, 0x97, 0x9c, 0x18, 0xa3, 0xfd, 0x45, 0x20, 0x17, 0x20, + 0x54, 0x9f, 0x42, 0x8c, 0xef, 0x23, 0x47, 0xaa, 0x40, 0x95, 0x12, 0x64, 0x18, 0xd0, 0x3a, 0xd3, + 0xf2, 0x05, 0xcb, 0xa5, 0x88, 0xfc, 0x30, 0x90, 0x0c, 0x24, 0xb0, 0x69, 0x0f, 0x90, 0xd3, 0x40, + 0x32, 0xfc, 0x2d, 0xea, 0xb0, 0x6c, 0x9e, 0x64, 0x4c, 0x92, 0x6e, 0x6d, 0xb3, 0x5d, 0x57, 0x4e, + 0x9f, 0x19, 0x92, 0xb1, 0xd9, 0xfa, 0x88, 0x16, 0x5f, 0x69, 0xf6, 0xf3, 0x73, 0x78, 0x80, 0x4d, + 0xd7, 0x29, 0xfe, 0x18, 0x75, 0x79, 0xe5, 0x87, 0x2b, 0xc5, 0x24, 0x41, 0x9e, 0x35, 0xd9, 0xa7, + 0x1d, 0x5e, 0x9d, 0xea, 0x14, 0x1f, 0x23, 0x87, 0x57, 0x3e, 0xaf, 0x24, 0x71, 0xa1, 0xd0, 0xe6, + 0xd5, 0x45, 0x25, 0xf1, 0xe7, 0xc8, 0xe5, 0x95, 0x31, 0xab, 0x2c, 0x53, 0xd2, 0xf7, 0xac, 0x89, + 0x43, 0x11, 0xaf, 0xce, 0x6a, 0xe4, 0x84, 0xa2, 0x7e, 0x73, 0x8a, 0xa6, 0x19, 0x0f, 0x8d, 0x19, + 0xbf, 0xda, 0x36, 0xe3, 0xfd, 0x7a, 0xea, 0x3b, 0xdc, 0xf8, 0x97, 0x85, 0x8e, 0x77, 0x92, 0x1a, + 0x7a, 0x5a, 0x5b, 0x7a, 0x3e, 0x45, 0x4e, 0xb4, 0x28, 0x33, 0x2e, 0x49, 0xab, 0xd6, 0x6b, 0xe7, + 0xf9, 0xe9, 0x19, 0x90, 0x8c, 0x5e, 0xf5, 0x89, 0x93, 0x4b, 0xe4, 0x36, 0xe0, 0x0f, 0xd9, 0x26, + 0xa0, 0xdf, 0x31, 0xff, 0xdf, 0x36, 0x3a, 0xda, 0xc5, 0xd1, 0x16, 0xc9, 0x03, 0xb5, 0xa8, 0x9b, + 0x43, 0xac, 0x9f, 0x24, 0xae, 0xae, 0x24, 0x33, 0xff, 0x03, 0x36, 0xad, 0x33, 0xfc, 0x18, 0xe1, + 0x48, 0x2c, 0xcb, 0x34, 0xf3, 0x73, 0x56, 0xa4, 0xa5, 0x0a, 0x54, 0x22, 0x32, 0xd2, 0xf7, 0xec, + 0x49, 0x9b, 0x1e, 0x9a, 0xca, 0xe5, 0xa6, 0xa0, 0x1d, 0xc5, 0xb2, 0xd8, 0xaf, 0x5b, 0xb5, 0x8d, + 0xa3, 0x58, 0x16, 0xff, 0x64, 0xba, 0x0d, 0x91, 0x9d, 0x0b, 0x09, 0xeb, 0x62, 0x53, 0x1d, 0xe2, + 0x2f, 0xd1, 0x41, 0x5e, 0xb0, 0xca, 0x2f, 0xc4, 0xef, 0x49, 0xec, 0xa7, 0xc1, 0x2b, 0x58, 0x18, + 0x9b, 0xf6, 0x35, 0x4a, 0x35, 0xf8, 0x22, 0x78, 0xa5, 0x97, 0x6d, 0x43, 0xe8, 0x02, 0xa1, 0x5b, + 0x34, 0x8a, 0xbc, 0x8a, 0x6a, 0x3f, 0xf5, 0xc0, 0x36, 0x5d, 0x5e, 0x45, 0xc6, 0x50, 0x0f, 0x50, + 0x47, 0x17, 0xb5, 0xa3, 0x8c, 0xd5, 0x1c, 0x5e, 0x45, 0xda, 0x52, 0x5f, 0xa0, 0xbe, 0x2e, 0xdc, + 0x7a, 0xca, 0x05, 0x4f, 0xb9, 0xbc, 0x8a, 0xd6, 0xa6, 0xc2, 0x9f, 0xea, 0x15, 0x4f, 0x99, 0x54, + 0x41, 0x9a, 0x93, 0x81, 0x67, 0x4d, 0x86, 0x74, 0x03, 0x68, 0x15, 0xd5, 0x2a, 0x67, 0xe4, 0x00, + 0x76, 0x1f, 0x62, 0xec, 0x21, 0x37, 0x12, 0x69, 0x5e, 0x30, 0x29, 0xb5, 0x4c, 0xf7, 0xa0, 0xd4, + 0x84, 0xb4, 0xf7, 0xf5, 0xae, 0xfb, 0xfa, 0xc7, 0x1d, 0x9a, 0xff, 0x24, 0x9d, 0x5f, 0xb0, 0x95, + 0x7e, 0x07, 0x7c, 0x37, 0x64, 0xf2, 0x9a, 0x91, 0x43, 0xf3, 0x48, 0x0d, 0xfc, 0x9c, 0xbc, 0x66, + 0xa7, 0x8f, 0xde, 0xfc, 0x3b, 0xda, 0x7b, 0x73, 0x3d, 0xb2, 0xde, 0x5e, 0x8f, 0xac, 0x77, 0xd7, + 0x23, 0xeb, 0x8f, 0x9b, 0xd1, 0xde, 0xdb, 0x9b, 0xd1, 0xde, 0x3f, 0x37, 0xa3, 0xbd, 0xdf, 0x06, + 0x5b, 0x9f, 0x9f, 0xd0, 0x81, 0xef, 0xc7, 0xd7, 0xff, 0x07, 0x00, 0x00, 0xff, 0xff, 0x8b, 0xff, + 0xf2, 0x75, 0xb0, 0x06, 0x00, 0x00, } func (m *CheckpointsModel) Marshal() (dAtA []byte, err error) { @@ -473,6 +479,22 @@ func (m *TableCheckpointModel) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.KvChecksum != 0 { + i -= 8 + encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(m.KvChecksum)) + i-- + dAtA[i] = 0x61 + } + if m.KvKvs != 0 { + i = encodeVarintFileCheckpoints(dAtA, i, uint64(m.KvKvs)) + i-- + dAtA[i] = 0x58 + } + if m.KvBytes != 0 { + i = encodeVarintFileCheckpoints(dAtA, i, uint64(m.KvBytes)) + i-- + dAtA[i] = 0x50 + } if m.TableID != 0 { i = encodeVarintFileCheckpoints(dAtA, i, uint64(m.TableID)) i-- @@ -810,6 +832,15 @@ func (m *TableCheckpointModel) Size() (n int) { if m.TableID != 0 { n += 1 + sovFileCheckpoints(uint64(m.TableID)) } + if m.KvBytes != 0 { + n += 1 + sovFileCheckpoints(uint64(m.KvBytes)) + } + if m.KvKvs != 0 { + n += 1 + sovFileCheckpoints(uint64(m.KvKvs)) + } + if m.KvChecksum != 0 { + n += 9 + } return n } @@ -1675,6 +1706,54 @@ func (m *TableCheckpointModel) Unmarshal(dAtA []byte) error { break } } + case 10: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field KvBytes", wireType) + } + m.KvBytes = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowFileCheckpoints + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.KvBytes |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 11: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field KvKvs", wireType) + } + m.KvKvs = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowFileCheckpoints + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.KvKvs |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 12: + if wireType != 1 { + return fmt.Errorf("proto: wrong wireType = %d for field KvChecksum", wireType) + } + m.KvChecksum = 0 + if (iNdEx + 8) > l { + return io.ErrUnexpectedEOF + } + m.KvChecksum = uint64(encoding_binary.LittleEndian.Uint64(dAtA[iNdEx:])) + iNdEx += 8 default: iNdEx = preIndex skippy, err := skipFileCheckpoints(dAtA[iNdEx:]) diff --git a/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.proto b/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.proto index 261a405da..9f8708a1f 100644 --- a/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.proto +++ b/pkg/lightning/checkpoints/checkpointspb/file_checkpoints.proto @@ -42,6 +42,9 @@ message TableCheckpointModel { int64 alloc_base = 4; map engines = 8; int64 tableID = 9; + uint64 kv_bytes = 10; + uint64 kv_kvs = 11; + fixed64 kv_checksum = 12; } message EngineCheckpointModel { @@ -67,4 +70,3 @@ message ChunkCheckpointModel { string sort_key = 16; int64 file_size = 17; } - diff --git a/pkg/lightning/common/storage_unix.go b/pkg/lightning/common/storage_unix.go index fcae82db7..3da84afa7 100644 --- a/pkg/lightning/common/storage_unix.go +++ b/pkg/lightning/common/storage_unix.go @@ -50,7 +50,7 @@ func GetStorageSize(dir string) (size StorageSize, err error) { } // Available blocks * size per block = available space in bytes - size.Available = stat.Bavail * bSize + size.Available = uint64(stat.Bavail) * bSize size.Capacity = stat.Blocks * bSize return diff --git a/pkg/lightning/common/util.go b/pkg/lightning/common/util.go index c0b6b17c6..c0ea2622e 100644 --- a/pkg/lightning/common/util.go +++ b/pkg/lightning/common/util.go @@ -99,9 +99,21 @@ func IsEmptyDir(name string) bool { return len(entries) == 0 } +type QueryExecutor interface { + QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) + QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row +} + +type DBExecutor interface { + QueryExecutor + BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error) + ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) +} + // SQLWithRetry constructs a retryable transaction. type SQLWithRetry struct { - DB *sql.DB + // either *sql.DB or *sql.Conn + DB DBExecutor Logger log.Logger HideQueryLog bool } @@ -130,6 +142,7 @@ outside: logger.Warn(purpose+" failed but going to try again", log.ShortError(err)) continue default: + logger.Warn(purpose+" failed with no retry", log.ShortError(err)) break outside } } @@ -261,6 +274,13 @@ func UniqueTable(schema string, table string) string { return builder.String() } +// EscapeIdentifier quote and escape an sql identifier +func EscapeIdentifier(identifier string) string { + var builder strings.Builder + WriteMySQLIdentifier(&builder, identifier) + return builder.String() +} + // Writes a MySQL identifier into the string builder. // The identifier is always escaped into the form "`foo`". func WriteMySQLIdentifier(builder *strings.Builder, identifier string) { diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index 3803c05d6..c649de098 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -71,6 +71,9 @@ const ( defaultIndexSerialScanConcurrency = 20 defaultChecksumTableConcurrency = 2 + // defaultMetaSchemaName is the default database name used to store lightning metadata + defaultMetaSchemaName = "lightning_metadata" + // autoDiskQuotaLocalReservedSpeed is the estimated size increase per // millisecond per write thread the local backend may gain on all engines. // This is used to compute the maximum size overshoot between two disk quota @@ -148,11 +151,12 @@ func (cfg *Config) ToTLS() (*common.TLS, error) { } type Lightning struct { - TableConcurrency int `toml:"table-concurrency" json:"table-concurrency"` - IndexConcurrency int `toml:"index-concurrency" json:"index-concurrency"` - RegionConcurrency int `toml:"region-concurrency" json:"region-concurrency"` - IOConcurrency int `toml:"io-concurrency" json:"io-concurrency"` - CheckRequirements bool `toml:"check-requirements" json:"check-requirements"` + TableConcurrency int `toml:"table-concurrency" json:"table-concurrency"` + IndexConcurrency int `toml:"index-concurrency" json:"index-concurrency"` + RegionConcurrency int `toml:"region-concurrency" json:"region-concurrency"` + IOConcurrency int `toml:"io-concurrency" json:"io-concurrency"` + CheckRequirements bool `toml:"check-requirements" json:"check-requirements"` + MetaSchemaName string `toml:"meta-schema-name" json:"meta-schema-name"` } type PostOpLevel int @@ -656,6 +660,9 @@ func (cfg *Config) DefaultVarsForImporterAndLocalBackend() { if cfg.App.TableConcurrency == 0 { cfg.App.TableConcurrency = 6 } + if len(cfg.App.MetaSchemaName) == 0 { + cfg.App.MetaSchemaName = defaultMetaSchemaName + } if cfg.TikvImporter.RangeConcurrency == 0 { cfg.TikvImporter.RangeConcurrency = 16 } diff --git a/pkg/lightning/lightning.go b/pkg/lightning/lightning.go index c44b5caa6..128202e6b 100755 --- a/pkg/lightning/lightning.go +++ b/pkg/lightning/lightning.go @@ -672,7 +672,7 @@ func checkSystemRequirement(cfg *config.Config, dbsMeta []*mydump.MDDatabaseMeta // region-concurrency: number of LocalWriters writing SST files. // 2*totalSize/memCacheSize: number of Pebble MemCache files. - estimateMaxFiles := uint64(cfg.App.RegionConcurrency) + uint64(topNTotalSize)/uint64(cfg.TikvImporter.EngineMemCacheSize)*2 + estimateMaxFiles := local.Rlim_t(cfg.App.RegionConcurrency) + local.Rlim_t(topNTotalSize)/local.Rlim_t(cfg.TikvImporter.EngineMemCacheSize)*2 if err := local.VerifyRLimit(estimateMaxFiles); err != nil { return err } diff --git a/pkg/lightning/restore/checksum_test.go b/pkg/lightning/restore/checksum_test.go index 2ca86f537..365f8037a 100644 --- a/pkg/lightning/restore/checksum_test.go +++ b/pkg/lightning/restore/checksum_test.go @@ -399,7 +399,7 @@ type mockChecksumKVClient struct { } // a mock client for checksum request -func (c *mockChecksumKVClient) Send(ctx context.Context, req *kv.Request, vars *kv.Variables, sessionMemTracker *memory.Tracker, enabledRateLimitAction bool) kv.Response { +func (c *mockChecksumKVClient) Send(ctx context.Context, req *kv.Request, vars interface{}, sessionMemTracker *memory.Tracker, enabledRateLimitAction bool) kv.Response { if c.curErrCount < c.maxErrCount { c.curErrCount++ return &mockErrorResponse{err: "tikv timeout"} diff --git a/pkg/lightning/restore/meta_manager.go b/pkg/lightning/restore/meta_manager.go new file mode 100644 index 000000000..bbef6fa6e --- /dev/null +++ b/pkg/lightning/restore/meta_manager.go @@ -0,0 +1,809 @@ +// Copyright 2021 PingCAP, Inc. Licensed under Apache-2.0. + +package restore + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/parser/model" + "github.com/pingcap/parser/mysql" + "go.uber.org/zap" + + "github.com/pingcap/br/pkg/lightning/backend/tidb" + "github.com/pingcap/br/pkg/lightning/common" + "github.com/pingcap/br/pkg/lightning/log" + verify "github.com/pingcap/br/pkg/lightning/verification" + "github.com/pingcap/br/pkg/pdutil" + "github.com/pingcap/br/pkg/redact" +) + +type metaMgrBuilder interface { + Init(ctx context.Context) error + TaskMetaMgr(pd *pdutil.PdController) taskMetaMgr + TableMetaMgr(tr *TableRestore) tableMetaMgr +} + +type dbMetaMgrBuilder struct { + db *sql.DB + taskID int64 + schema string +} + +func (b *dbMetaMgrBuilder) Init(ctx context.Context) error { + exec := common.SQLWithRetry{ + DB: b.db, + Logger: log.L(), + HideQueryLog: redact.NeedRedact(), + } + metaDBSQL := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", common.EscapeIdentifier(b.schema)) + if err := exec.Exec(ctx, "create meta schema", metaDBSQL); err != nil { + return errors.Annotate(err, "create meta schema failed") + } + taskMetaSQL := fmt.Sprintf(CreateTaskMetaTable, common.UniqueTable(b.schema, taskMetaTableName)) + if err := exec.Exec(ctx, "create meta table", taskMetaSQL); err != nil { + return errors.Annotate(err, "create task meta table failed") + } + tableMetaSQL := fmt.Sprintf(CreateTableMetadataTable, common.UniqueTable(b.schema, tableMetaTableName)) + if err := exec.Exec(ctx, "create meta table", tableMetaSQL); err != nil { + return errors.Annotate(err, "create table meta table failed") + } + return nil +} + +func (b *dbMetaMgrBuilder) TaskMetaMgr(pd *pdutil.PdController) taskMetaMgr { + return &dbTaskMetaMgr{ + session: b.db, + taskID: b.taskID, + pd: pd, + tableName: common.UniqueTable(b.schema, taskMetaTableName), + schemaName: b.schema, + } +} + +func (b *dbMetaMgrBuilder) TableMetaMgr(tr *TableRestore) tableMetaMgr { + return &dbTableMetaMgr{ + session: b.db, + taskID: b.taskID, + tr: tr, + tableName: common.UniqueTable(b.schema, tableMetaTableName), + } +} + +type tableMetaMgr interface { + InitTableMeta(ctx context.Context) error + AllocTableRowIDs(ctx context.Context, rawRowIDMax int64) (*verify.KVChecksum, int64, error) + UpdateTableStatus(ctx context.Context, status metaStatus) error + UpdateTableBaseChecksum(ctx context.Context, checksum *verify.KVChecksum) error + CheckAndUpdateLocalChecksum(ctx context.Context, checksum *verify.KVChecksum) (bool, *verify.KVChecksum, error) + FinishTable(ctx context.Context) error +} + +type dbTableMetaMgr struct { + session *sql.DB + taskID int64 + tr *TableRestore + tableName string +} + +func (m *dbTableMetaMgr) InitTableMeta(ctx context.Context) error { + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: m.tr.logger, + } + // avoid override existing metadata if the meta is already inserted. + stmt := fmt.Sprintf(`INSERT IGNORE INTO %s (task_id, table_id, table_name, status) values (?, ?, ?, ?)`, m.tableName) + task := m.tr.logger.Begin(zap.DebugLevel, "init table meta") + err := exec.Exec(ctx, "init table meta", stmt, m.taskID, m.tr.tableInfo.ID, m.tr.tableName, metaStatusInitial.String()) + task.End(zap.ErrorLevel, err) + return errors.Trace(err) +} + +type metaStatus uint32 + +const ( + metaStatusInitial metaStatus = iota + metaStatusRowIDAllocated + metaStatusRestoreStarted + metaStatusRestoreFinished + metaStatusChecksuming + metaStatusChecksumSkipped + metaStatusFinished +) + +func (m metaStatus) String() string { + switch m { + case metaStatusInitial: + return "initialized" + case metaStatusRowIDAllocated: + return "allocated" + case metaStatusRestoreStarted: + return "restore" + case metaStatusRestoreFinished: + return "restore_finished" + case metaStatusChecksuming: + return "checksuming" + case metaStatusChecksumSkipped: + return "checksum_skipped" + case metaStatusFinished: + return "finish" + default: + panic(fmt.Sprintf("unexpected metaStatus value '%d'", m)) + } +} + +func parseMetaStatus(s string) (metaStatus, error) { + switch s { + case "", "initialized": + return metaStatusInitial, nil + case "allocated": + return metaStatusRowIDAllocated, nil + case "restore": + return metaStatusRestoreStarted, nil + case "restore_finished": + return metaStatusRestoreFinished, nil + case "checksuming": + return metaStatusChecksuming, nil + case "checksum_skipped": + return metaStatusChecksumSkipped, nil + case "finish": + return metaStatusFinished, nil + default: + return metaStatusInitial, errors.Errorf("invalid meta status '%s'", s) + } +} + +func (m *dbTableMetaMgr) AllocTableRowIDs(ctx context.Context, rawRowIDMax int64) (*verify.KVChecksum, int64, error) { + conn, err := m.session.Conn(ctx) + if err != nil { + return nil, 0, errors.Trace(err) + } + defer conn.Close() + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: m.tr.logger, + } + var newRowIDBase, newRowIDMax int64 + curStatus := metaStatusInitial + newStatus := metaStatusRowIDAllocated + var baseTotalKvs, baseTotalBytes, baseChecksum uint64 + err = exec.Exec(ctx, "enable pessimistic transaction", "SET SESSION tidb_txn_mode = 'pessimistic';") + if err != nil { + return nil, 0, errors.Annotate(err, "enable pessimistic transaction failed") + } + needAutoID := common.TableHasAutoRowID(m.tr.tableInfo.Core) || m.tr.tableInfo.Core.GetAutoIncrementColInfo() != nil || m.tr.tableInfo.Core.ContainsAutoRandomBits() + err = exec.Transact(ctx, "init table allocator base", func(ctx context.Context, tx *sql.Tx) error { + query := fmt.Sprintf("SELECT task_id, row_id_base, row_id_max, total_kvs_base, total_bytes_base, checksum_base, status from %s WHERE table_id = ? FOR UPDATE", m.tableName) + rows, err := tx.QueryContext(ctx, query, m.tr.tableInfo.ID) + if err != nil { + return errors.Trace(err) + } + defer rows.Close() + var ( + metaTaskID, rowIDBase, rowIDMax, maxRowIDMax int64 + totalKvs, totalBytes, checksum uint64 + statusValue string + ) + for rows.Next() { + if err = rows.Scan(&metaTaskID, &rowIDBase, &rowIDMax, &totalKvs, &totalBytes, &checksum, &statusValue); err != nil { + return errors.Trace(err) + } + status, err := parseMetaStatus(statusValue) + if err != nil { + return errors.Annotatef(err, "invalid meta status '%s'", statusValue) + } + + // skip finished meta + if status >= metaStatusFinished { + continue + } + + if status == metaStatusChecksuming { + return errors.New("target table is calculating checksum, please wait unit the checksum is finished and try again.") + } + + if metaTaskID == m.taskID { + curStatus = status + baseChecksum = checksum + baseTotalKvs = totalKvs + baseTotalBytes = totalBytes + if status >= metaStatusRowIDAllocated { + if rowIDMax-rowIDBase != rawRowIDMax { + return errors.Errorf("verify allocator base failed. local: '%d', meta: '%d'", rawRowIDMax, rowIDMax-rowIDBase) + } + newRowIDBase = rowIDBase + newRowIDMax = rowIDMax + break + } + continue + } + + // other tasks has finished this logic, we needn't do again. + if status >= metaStatusRowIDAllocated { + newStatus = metaStatusRestoreStarted + } + + if rowIDMax > maxRowIDMax { + maxRowIDMax = rowIDMax + } + } + + // no enough info are available, fetch row_id max for table + if curStatus == metaStatusInitial { + if needAutoID && maxRowIDMax == 0 { + // NOTE: currently, if a table contains auto_incremental unique key and _tidb_rowid, + // the `show table next_row_id` will returns the unique key field only. + var autoIDField string + for _, col := range m.tr.tableInfo.Core.Columns { + if mysql.HasAutoIncrementFlag(col.Flag) { + autoIDField = col.Name.L + break + } else if mysql.HasPriKeyFlag(col.Flag) && m.tr.tableInfo.Core.AutoRandomBits > 0 { + autoIDField = col.Name.L + break + } + } + if len(autoIDField) == 0 && common.TableHasAutoRowID(m.tr.tableInfo.Core) { + autoIDField = model.ExtraHandleName.L + } + if len(autoIDField) == 0 { + return errors.Errorf("table %s contains auto increment id or _tidb_rowid, but target field not found", m.tr.tableName) + } + + autoIDInfos, err := tidb.FetchTableAutoIDInfos(ctx, tx, m.tr.tableName) + if err != nil { + return errors.Trace(err) + } + found := false + for _, info := range autoIDInfos { + if strings.ToLower(info.Column) == autoIDField { + maxRowIDMax = info.NextID - 1 + found = true + break + } + } + if !found { + return errors.Errorf("can't fetch previous auto id base for table %s field '%s'", m.tr.tableName, autoIDField) + } + } + newRowIDBase = maxRowIDMax + newRowIDMax = newRowIDBase + rawRowIDMax + // table contains no data, can skip checksum + if needAutoID && newRowIDBase == 0 && newStatus < metaStatusRestoreStarted { + newStatus = metaStatusRestoreStarted + } + query = fmt.Sprintf("update %s set row_id_base = ?, row_id_max = ?, status = ? where table_id = ? and task_id = ?", m.tableName) + _, err := tx.ExecContext(ctx, query, newRowIDBase, newRowIDMax, newStatus.String(), m.tr.tableInfo.ID, m.taskID) + if err != nil { + return errors.Trace(err) + } + + curStatus = newStatus + } + return nil + }) + if err != nil { + return nil, 0, errors.Trace(err) + } + + var checksum *verify.KVChecksum + // need to do checksum and update checksum meta since we are the first one. + if curStatus < metaStatusRestoreStarted { + // table contains data but haven't do checksum yet + if (newRowIDBase > 0 || !needAutoID) && baseTotalKvs == 0 { + remoteCk, err := DoChecksum(ctx, m.tr.tableInfo) + if err != nil { + return nil, 0, errors.Trace(err) + } + + if remoteCk.Checksum != baseChecksum || remoteCk.TotalKVs != baseTotalKvs || remoteCk.TotalBytes != baseTotalBytes { + ck := verify.MakeKVChecksum(remoteCk.TotalBytes, remoteCk.TotalKVs, remoteCk.Checksum) + checksum = &ck + } + + } + + if checksum != nil { + if err = m.UpdateTableBaseChecksum(ctx, checksum); err != nil { + return nil, 0, errors.Trace(err) + } + + m.tr.logger.Info("checksum before restore table", zap.Object("checksum", checksum)) + } else if err = m.UpdateTableStatus(ctx, metaStatusRestoreStarted); err != nil { + return nil, 0, errors.Trace(err) + } + } + if checksum == nil && baseTotalKvs > 0 { + ck := verify.MakeKVChecksum(baseTotalBytes, baseTotalKvs, baseChecksum) + checksum = &ck + } + log.L().Info("allocate table row_id base", zap.String("table", m.tr.tableName), + zap.Int64("row_id_base", newRowIDBase)) + if checksum != nil { + log.L().Info("checksum base", zap.Any("checksum", checksum)) + } + return checksum, newRowIDBase, nil +} + +func (m *dbTableMetaMgr) UpdateTableBaseChecksum(ctx context.Context, checksum *verify.KVChecksum) error { + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: m.tr.logger, + } + query := fmt.Sprintf("update %s set total_kvs_base = ?, total_bytes_base = ?, checksum_base = ?, status = ? where table_id = ? and task_id = ?", m.tableName) + + return exec.Exec(ctx, "update base checksum", query, checksum.SumKVS(), + checksum.SumSize(), checksum.Sum(), metaStatusRestoreStarted.String(), m.tr.tableInfo.ID, m.taskID) +} + +func (m *dbTableMetaMgr) UpdateTableStatus(ctx context.Context, status metaStatus) error { + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: m.tr.logger, + } + query := fmt.Sprintf("update %s set status = ? where table_id = ? and task_id = ?", m.tableName) + return exec.Exec(ctx, "update meta status", query, status.String(), m.tr.tableInfo.ID, m.taskID) +} + +func (m *dbTableMetaMgr) CheckAndUpdateLocalChecksum(ctx context.Context, checksum *verify.KVChecksum) (bool, *verify.KVChecksum, error) { + conn, err := m.session.Conn(ctx) + if err != nil { + return false, nil, errors.Trace(err) + } + defer conn.Close() + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: m.tr.logger, + } + err = exec.Exec(ctx, "enable pessimistic transaction", "SET SESSION tidb_txn_mode = 'pessimistic';") + if err != nil { + return false, nil, errors.Annotate(err, "enable pessimistic transaction failed") + } + var ( + baseTotalKvs, baseTotalBytes, baseChecksum uint64 + taskKvs, taskBytes, taskChecksum uint64 + totalKvs, totalBytes, totalChecksum uint64 + ) + newStatus := metaStatusChecksuming + needChecksum := true + err = exec.Transact(ctx, "checksum pre-check", func(ctx context.Context, tx *sql.Tx) error { + query := fmt.Sprintf("SELECT task_id, total_kvs_base, total_bytes_base, checksum_base, total_kvs, total_bytes, checksum, status from %s WHERE table_id = ? FOR UPDATE", m.tableName) + rows, err := tx.QueryContext(ctx, query, m.tr.tableInfo.ID) + if err != nil { + return errors.Annotate(err, "fetch task meta failed") + } + closed := false + defer func() { + if !closed { + rows.Close() + } + }() + var ( + taskID int64 + statusValue string + ) + for rows.Next() { + if err = rows.Scan(&taskID, &baseTotalKvs, &baseTotalBytes, &baseChecksum, &taskKvs, &taskBytes, &taskChecksum, &statusValue); err != nil { + return errors.Trace(err) + } + status, err := parseMetaStatus(statusValue) + if err != nil { + return errors.Annotatef(err, "invalid meta status '%s'", statusValue) + } + + // skip finished meta + if status >= metaStatusFinished { + continue + } + + if taskID == m.taskID { + if status >= metaStatusChecksuming { + newStatus = status + needChecksum = status == metaStatusChecksuming + return nil + } + + continue + } + + if status < metaStatusChecksuming { + newStatus = metaStatusChecksumSkipped + needChecksum = false + break + } else if status == metaStatusChecksuming { + return errors.New("another task is checksuming, there must be something wrong!") + } + + totalBytes += baseTotalBytes + totalKvs += baseTotalKvs + totalChecksum ^= baseChecksum + + totalBytes += taskBytes + totalKvs += taskKvs + totalChecksum ^= taskChecksum + } + rows.Close() + closed = true + + query = fmt.Sprintf("update %s set total_kvs = ?, total_bytes = ?, checksum = ?, status = ? where table_id = ? and task_id = ?", m.tableName) + _, err = tx.ExecContext(ctx, query, checksum.SumKVS(), checksum.SumSize(), checksum.Sum(), newStatus.String(), m.tr.tableInfo.ID, m.taskID) + return errors.Annotate(err, "update local checksum failed") + }) + if err != nil { + return false, nil, err + } + + var remoteChecksum *verify.KVChecksum + if needChecksum { + ck := verify.MakeKVChecksum(totalBytes, totalKvs, totalChecksum) + remoteChecksum = &ck + } + log.L().Info("check table checksum", zap.String("table", m.tr.tableName), + zap.Bool("checksum", needChecksum), zap.String("new_status", newStatus.String())) + return needChecksum, remoteChecksum, nil +} + +func (m *dbTableMetaMgr) FinishTable(ctx context.Context) error { + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: m.tr.logger, + } + query := fmt.Sprintf("DELETE FROM %s where table_id = ? and (status = 'checksuming' or status = 'checksum_skipped')", m.tableName) + return exec.Exec(ctx, "clean up metas", query, m.tr.tableInfo.ID) +} + +type taskMetaMgr interface { + InitTask(ctx context.Context) error + CheckAndPausePdSchedulers(ctx context.Context) (pdutil.UndoFunc, error) + CheckAndFinishRestore(ctx context.Context) (bool, error) + Cleanup(ctx context.Context) error + CleanupAllMetas(ctx context.Context) error +} + +type dbTaskMetaMgr struct { + session *sql.DB + taskID int64 + pd *pdutil.PdController + // unique name of task meta table + tableName string + schemaName string +} + +type taskMetaStatus uint32 + +const ( + taskMetaStatusInitial taskMetaStatus = iota + taskMetaStatusScheduleSet + taskMetaStatusSwitchSkipped + taskMetaStatusSwitchBack +) + +func (m taskMetaStatus) String() string { + switch m { + case taskMetaStatusInitial: + return "initialized" + case taskMetaStatusScheduleSet: + return "schedule_set" + case taskMetaStatusSwitchSkipped: + return "skip_switch" + case taskMetaStatusSwitchBack: + return "switched" + default: + panic(fmt.Sprintf("unexpected metaStatus value '%d'", m)) + } +} + +func parseTaskMetaStatus(s string) (taskMetaStatus, error) { + switch s { + case "", "initialized": + return taskMetaStatusInitial, nil + case "schedule_set": + return taskMetaStatusScheduleSet, nil + case "skip_switch": + return taskMetaStatusSwitchSkipped, nil + case "switched": + return taskMetaStatusSwitchBack, nil + default: + return taskMetaStatusInitial, errors.Errorf("invalid meta status '%s'", s) + } +} + +type storedCfgs struct { + PauseCfg pdutil.ClusterConfig `json:"paused"` + RestoreCfg pdutil.ClusterConfig `json:"restore"` +} + +func (m *dbTaskMetaMgr) InitTask(ctx context.Context) error { + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: log.L(), + } + // avoid override existing metadata if the meta is already inserted. + stmt := fmt.Sprintf(`INSERT IGNORE INTO %s (task_id, status) values (?, ?)`, m.tableName) + err := exec.Exec(ctx, "init task meta", stmt, m.taskID, taskMetaStatusInitial.String()) + return errors.Trace(err) +} + +func (m *dbTaskMetaMgr) CheckAndPausePdSchedulers(ctx context.Context) (pdutil.UndoFunc, error) { + pauseCtx, cancel := context.WithCancel(ctx) + conn, err := m.session.Conn(ctx) + if err != nil { + return nil, errors.Trace(err) + } + defer conn.Close() + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: log.L(), + } + err = exec.Exec(ctx, "enable pessimistic transaction", "SET SESSION tidb_txn_mode = 'pessimistic';") + if err != nil { + return nil, errors.Annotate(err, "enable pessimistic transaction failed") + } + + needSwitch := true + paused := false + var pausedCfg storedCfgs + err = exec.Transact(ctx, "check and pause schedulers", func(ctx context.Context, tx *sql.Tx) error { + query := fmt.Sprintf("SELECT task_id, pd_cfgs, status from %s FOR UPDATE", m.tableName) + rows, err := tx.QueryContext(ctx, query) + if err != nil { + return errors.Annotate(err, "fetch task meta failed") + } + closed := false + defer func() { + if !closed { + rows.Close() + } + }() + var ( + taskID int64 + cfg string + statusValue string + ) + var cfgStr string + for rows.Next() { + if err = rows.Scan(&taskID, &cfg, &statusValue); err != nil { + return errors.Trace(err) + } + status, err := parseTaskMetaStatus(statusValue) + if err != nil { + return errors.Annotatef(err, "invalid task meta status '%s'", statusValue) + } + + if status == taskMetaStatusInitial { + continue + } + + if taskID == m.taskID { + if status >= taskMetaStatusSwitchSkipped { + needSwitch = false + return nil + } + } + + if cfg != "" { + cfgStr = cfg + break + } + } + if err = rows.Close(); err != nil { + return errors.Trace(err) + } + closed = true + + if cfgStr != "" { + err = json.Unmarshal([]byte(cfgStr), &pausedCfg) + return errors.Trace(err) + } + + orig, removed, err := m.pd.RemoveSchedulersWithOrigin(pauseCtx) + if err != nil { + return errors.Trace(err) + } + paused = true + + pausedCfg = storedCfgs{PauseCfg: removed, RestoreCfg: orig} + jsonByts, err := json.Marshal(&pausedCfg) + if err != nil { + return errors.Trace(err) + } + + query = fmt.Sprintf("update %s set pd_cfgs = ?, status = ? where task_id = ?", m.tableName) + _, err = tx.ExecContext(ctx, query, string(jsonByts), taskMetaStatusScheduleSet.String(), m.taskID) + + return errors.Annotate(err, "update task pd configs failed") + }) + if err != nil { + return nil, err + } + + if !needSwitch { + return nil, nil + } + + if !paused { + if err = m.pd.RemoveSchedulersWithCfg(pauseCtx, pausedCfg.PauseCfg); err != nil { + return nil, err + } + } + + cancelFunc := m.pd.MakeUndoFunctionByConfig(pausedCfg.RestoreCfg) + + return func(ctx context.Context) error { + // close the periodic task ctx + cancel() + return cancelFunc(ctx) + }, nil +} + +func (m *dbTaskMetaMgr) CheckAndFinishRestore(ctx context.Context) (bool, error) { + conn, err := m.session.Conn(ctx) + if err != nil { + return false, errors.Trace(err) + } + defer conn.Close() + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: log.L(), + } + err = exec.Exec(ctx, "enable pessimistic transaction", "SET SESSION tidb_txn_mode = 'pessimistic';") + if err != nil { + return false, errors.Annotate(err, "enable pessimistic transaction failed") + } + + switchBack := true + err = exec.Transact(ctx, "check and finish schedulers", func(ctx context.Context, tx *sql.Tx) error { + query := fmt.Sprintf("SELECT task_id, status from %s FOR UPDATE", m.tableName) + rows, err := tx.QueryContext(ctx, query) + if err != nil { + return errors.Annotate(err, "fetch task meta failed") + } + closed := false + defer func() { + if !closed { + rows.Close() + } + }() + var ( + taskID int64 + statusValue string + ) + newStatus := taskMetaStatusSwitchBack + for rows.Next() { + if err = rows.Scan(&taskID, &statusValue); err != nil { + return errors.Trace(err) + } + status, err := parseTaskMetaStatus(statusValue) + if err != nil { + return errors.Annotatef(err, "invalid task meta status '%s'", statusValue) + } + + if taskID == m.taskID { + continue + } + + if status < taskMetaStatusSwitchSkipped { + newStatus = taskMetaStatusSwitchSkipped + switchBack = false + break + } + } + if err = rows.Close(); err != nil { + return errors.Trace(err) + } + closed = true + + query = fmt.Sprintf("update %s set status = ? where task_id = ?", m.tableName) + _, err = tx.ExecContext(ctx, query, newStatus.String(), m.taskID) + + return errors.Trace(err) + }) + + return switchBack, err +} + +func (m *dbTaskMetaMgr) Cleanup(ctx context.Context) error { + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: log.L(), + } + // avoid override existing metadata if the meta is already inserted. + stmt := fmt.Sprintf("DROP TABLE %s;", m.tableName) + if err := exec.Exec(ctx, "cleanup task meta tables", stmt); err != nil { + return errors.Trace(err) + } + return nil +} + +func (m *dbTaskMetaMgr) CleanupAllMetas(ctx context.Context) error { + exec := &common.SQLWithRetry{ + DB: m.session, + Logger: log.L(), + } + + // check if all tables are finished + query := fmt.Sprintf("SELECT COUNT(*) from %s", common.UniqueTable(m.schemaName, tableMetaTableName)) + var cnt int + if err := exec.QueryRow(ctx, "fetch table meta row count", query, &cnt); err != nil { + return errors.Trace(err) + } + if cnt > 0 { + log.L().Warn("there are unfinished table in table meta table, cleanup skipped.") + return nil + } + + // avoid override existing metadata if the meta is already inserted. + stmt := fmt.Sprintf("DROP DATABASE %s;", common.EscapeIdentifier(m.schemaName)) + if err := exec.Exec(ctx, "cleanup task meta tables", stmt); err != nil { + return errors.Trace(err) + } + return nil +} + +type noopMetaMgrBuilder struct{} + +func (b noopMetaMgrBuilder) Init(ctx context.Context) error { + return nil +} + +func (b noopMetaMgrBuilder) TaskMetaMgr(pd *pdutil.PdController) taskMetaMgr { + return noopTaskMetaMgr{} +} + +func (b noopMetaMgrBuilder) TableMetaMgr(tr *TableRestore) tableMetaMgr { + return noopTableMetaMgr{} +} + +type noopTaskMetaMgr struct{} + +func (m noopTaskMetaMgr) InitTask(ctx context.Context) error { + return nil +} + +func (m noopTaskMetaMgr) CheckAndPausePdSchedulers(ctx context.Context) (pdutil.UndoFunc, error) { + return func(ctx context.Context) error { + return nil + }, nil +} + +func (m noopTaskMetaMgr) CheckAndFinishRestore(ctx context.Context) (bool, error) { + return false, nil +} + +func (m noopTaskMetaMgr) Cleanup(ctx context.Context) error { + return nil +} + +func (m noopTaskMetaMgr) CleanupAllMetas(ctx context.Context) error { + return nil +} + +type noopTableMetaMgr struct{} + +func (m noopTableMetaMgr) InitTableMeta(ctx context.Context) error { + return nil +} + +func (m noopTableMetaMgr) AllocTableRowIDs(ctx context.Context, rawRowIDMax int64) (*verify.KVChecksum, int64, error) { + return nil, 0, nil +} + +func (m noopTableMetaMgr) UpdateTableStatus(ctx context.Context, status metaStatus) error { + return nil +} + +func (m noopTableMetaMgr) UpdateTableBaseChecksum(ctx context.Context, checksum *verify.KVChecksum) error { + return nil +} + +func (m noopTableMetaMgr) CheckAndUpdateLocalChecksum(ctx context.Context, checksum *verify.KVChecksum) (bool, *verify.KVChecksum, error) { + return false, nil, nil +} + +func (m noopTableMetaMgr) FinishTable(ctx context.Context) error { + return nil +} diff --git a/pkg/lightning/restore/meta_manager_test.go b/pkg/lightning/restore/meta_manager_test.go new file mode 100644 index 000000000..bf2fcba38 --- /dev/null +++ b/pkg/lightning/restore/meta_manager_test.go @@ -0,0 +1,244 @@ +// Copyright 2021 PingCAP, Inc. Licensed under Apache-2.0. + +package restore + +import ( + "context" + "database/sql" + "database/sql/driver" + + "github.com/DATA-DOG/go-sqlmock" + . "github.com/pingcap/check" + "github.com/pingcap/parser" + "github.com/pingcap/parser/ast" + "github.com/pingcap/parser/model" + "github.com/pingcap/tidb/ddl" + tmock "github.com/pingcap/tidb/util/mock" + "go.uber.org/zap" + + "github.com/pingcap/br/pkg/lightning/checkpoints" + "github.com/pingcap/br/pkg/lightning/common" + "github.com/pingcap/br/pkg/lightning/log" + "github.com/pingcap/br/pkg/lightning/verification" +) + +var _ = Suite(&metaMgrSuite{}) + +type metaMgrSuite struct { + dbHandle *sql.DB + mockDB sqlmock.Sqlmock + tr *TableRestore + mgr *dbTableMetaMgr + checksumMgr *testChecksumMgr +} + +func (s *metaMgrSuite) SetUpSuite(c *C) { + p := parser.New() + se := tmock.NewContext() + + node, err := p.ParseOneStmt("CREATE TABLE `t1` (`c1` varchar(5) NOT NULL)", "utf8mb4", "utf8mb4_bin") + c.Assert(err, IsNil) + tableInfo, err := ddl.MockTableInfo(se, node.(*ast.CreateTableStmt), int64(1)) + c.Assert(err, IsNil) + tableInfo.State = model.StatePublic + + schema := "test" + tb := "t1" + ti := &checkpoints.TidbTableInfo{ + ID: tableInfo.ID, + DB: schema, + Name: tb, + Core: tableInfo, + } + + tableName := common.UniqueTable(schema, tb) + logger := log.With(zap.String("table", tableName)) + s.tr = &TableRestore{ + tableName: tableName, + tableInfo: ti, + logger: logger, + } +} + +func (s *metaMgrSuite) SetUpTest(c *C) { + db, m, err := sqlmock.New() + c.Assert(err, IsNil) + + s.mgr = &dbTableMetaMgr{ + session: db, + taskID: 1, + tr: s.tr, + tableName: common.UniqueTable("test", tableMetaTableName), + } + s.mockDB = m + s.checksumMgr = &testChecksumMgr{} +} + +func (s *metaMgrSuite) TearDownTest(c *C) { + c.Assert(s.mockDB.ExpectationsWereMet(), IsNil) +} + +func (s *metaMgrSuite) TestAllocTableRowIDsSingleTable(c *C) { + ctx := context.WithValue(context.Background(), &checksumManagerKey, s.checksumMgr) + + rows := [][]driver.Value{ + {int64(1), int64(0), int64(0), uint64(0), uint64(0), uint64(0), "initialized"}, + } + nextID := int64(1) + updateArgs := []driver.Value{int64(0), int64(10), "restore", int64(1), int64(1)} + s.prepareMock(rows, &nextID, updateArgs, nil, nil) + + ck, rowIDBase, err := s.mgr.AllocTableRowIDs(ctx, 10) + c.Assert(err, IsNil) + c.Assert(rowIDBase, Equals, int64(0)) + c.Assert(ck, IsNil) + c.Assert(s.checksumMgr.callCnt, Equals, 0) +} + +func (s *metaMgrSuite) TestAllocTableRowIDsSingleTableAutoIDNot0(c *C) { + ctx := context.WithValue(context.Background(), &checksumManagerKey, s.checksumMgr) + + rows := [][]driver.Value{ + {int64(1), int64(0), int64(0), uint64(0), uint64(0), uint64(0), "initialized"}, + } + nextID := int64(999) + updateArgs := []driver.Value{int64(998), int64(1008), "allocated", int64(1), int64(1)} + newStatus := "restore" + s.prepareMock(rows, &nextID, updateArgs, nil, &newStatus) + + ck, rowIDBase, err := s.mgr.AllocTableRowIDs(ctx, 10) + c.Assert(err, IsNil) + c.Assert(rowIDBase, Equals, int64(998)) + c.Assert(ck, IsNil) + c.Assert(s.checksumMgr.callCnt, Equals, 1) +} + +func (s *metaMgrSuite) TestAllocTableRowIDsSingleTableContainsData(c *C) { + ctx := context.WithValue(context.Background(), &checksumManagerKey, s.checksumMgr) + + rows := [][]driver.Value{ + {int64(1), int64(0), int64(0), uint64(0), uint64(0), uint64(0), "initialized"}, + } + nextID := int64(999) + checksum := verification.MakeKVChecksum(1, 2, 3) + updateArgs := []driver.Value{int64(998), int64(1008), "allocated", int64(1), int64(1)} + s.prepareMock(rows, &nextID, updateArgs, &checksum, nil) + + ck, rowIDBase, err := s.mgr.AllocTableRowIDs(ctx, 10) + c.Assert(err, IsNil) + c.Assert(rowIDBase, Equals, int64(998)) + c.Assert(ck, DeepEquals, &checksum) + c.Assert(s.checksumMgr.callCnt, Equals, 1) +} + +func (s *metaMgrSuite) TestAllocTableRowIDsAllocated(c *C) { + ctx := context.WithValue(context.Background(), &checksumManagerKey, s.checksumMgr) + + rows := [][]driver.Value{ + {int64(1), int64(998), int64(1008), uint64(0), uint64(0), uint64(0), metaStatusRowIDAllocated.String()}, + } + checksum := verification.MakeKVChecksum(2, 1, 3) + s.prepareMock(rows, nil, nil, &checksum, nil) + + ck, rowIDBase, err := s.mgr.AllocTableRowIDs(ctx, 10) + c.Assert(err, IsNil) + c.Assert(rowIDBase, Equals, int64(998)) + c.Assert(ck, DeepEquals, &checksum) + c.Assert(s.checksumMgr.callCnt, Equals, 1) +} + +func (s *metaMgrSuite) TestAllocTableRowIDsFinished(c *C) { + ctx := context.WithValue(context.Background(), &checksumManagerKey, s.checksumMgr) + + rows := [][]driver.Value{ + {int64(1), int64(998), int64(1008), uint64(1), uint64(2), uint64(3), metaStatusRestoreStarted.String()}, + } + checksum := verification.MakeKVChecksum(2, 1, 3) + s.prepareMock(rows, nil, nil, nil, nil) + + ck, rowIDBase, err := s.mgr.AllocTableRowIDs(ctx, 10) + c.Assert(err, IsNil) + c.Assert(rowIDBase, Equals, int64(998)) + c.Assert(ck, DeepEquals, &checksum) + c.Assert(s.checksumMgr.callCnt, Equals, 0) +} + +func (s *metaMgrSuite) TestAllocTableRowIDsMultiTasksInit(c *C) { + ctx := context.WithValue(context.Background(), &checksumManagerKey, s.checksumMgr) + + rows := [][]driver.Value{ + {int64(1), int64(0), int64(0), uint64(0), uint64(0), uint64(0), "initialized"}, + {int64(2), int64(0), int64(0), uint64(0), uint64(0), uint64(0), "initialized"}, + } + nextID := int64(1) + updateArgs := []driver.Value{int64(0), int64(10), "restore", int64(1), int64(1)} + s.prepareMock(rows, &nextID, updateArgs, nil, nil) + + ck, rowIDBase, err := s.mgr.AllocTableRowIDs(ctx, 10) + c.Assert(err, IsNil) + c.Assert(rowIDBase, Equals, int64(0)) + c.Assert(ck, IsNil) + c.Assert(s.checksumMgr.callCnt, Equals, 0) +} + +func (s *metaMgrSuite) TestAllocTableRowIDsMultiTasksAllocated(c *C) { + ctx := context.WithValue(context.Background(), &checksumManagerKey, s.checksumMgr) + + rows := [][]driver.Value{ + {int64(1), int64(0), int64(0), uint64(0), uint64(0), uint64(0), metaStatusInitial.String()}, + {int64(2), int64(0), int64(100), uint64(0), uint64(0), uint64(0), metaStatusRowIDAllocated.String()}, + } + updateArgs := []driver.Value{int64(100), int64(110), "restore", int64(1), int64(1)} + s.prepareMock(rows, nil, updateArgs, nil, nil) + + ck, rowIDBase, err := s.mgr.AllocTableRowIDs(ctx, 10) + c.Assert(err, IsNil) + c.Assert(rowIDBase, Equals, int64(100)) + c.Assert(ck, IsNil) + c.Assert(s.checksumMgr.callCnt, Equals, 0) +} + +func (s *metaMgrSuite) prepareMock(rowsVal [][]driver.Value, nextRowID *int64, updateArgs []driver.Value, checksum *verification.KVChecksum, updateStatus *string) { + s.mockDB.ExpectExec("SET SESSION tidb_txn_mode = 'pessimistic';"). + WillReturnResult(sqlmock.NewResult(int64(0), int64(0))) + + s.mockDB.ExpectBegin() + + rows := sqlmock.NewRows([]string{"task_id", "row_id_base", "row_id_max", "total_kvs_base", "total_bytes_base", "checksum_base", "status"}) + for _, r := range rowsVal { + rows = rows.AddRow(r...) + } + s.mockDB.ExpectQuery("\\QSELECT task_id, row_id_base, row_id_max, total_kvs_base, total_bytes_base, checksum_base, status from `test`.`table_meta` WHERE table_id = ? FOR UPDATE\\E"). + WithArgs(int64(1)). + WillReturnRows(rows) + if nextRowID != nil { + s.mockDB.ExpectQuery("SHOW TABLE `test`.`t1` NEXT_ROW_ID"). + WillReturnRows(sqlmock.NewRows([]string{"DB_NAME", "TABLE_NAME", "COLUMN_NAME", "NEXT_GLOBAL_ROW_ID", "ID_TYPE"}). + AddRow("test", "t1", "_tidb_rowid", *nextRowID, "AUTO_INCREMENT")) + } + + if len(updateArgs) > 0 { + s.mockDB.ExpectExec("\\Qupdate `test`.`table_meta` set row_id_base = ?, row_id_max = ?, status = ? where table_id = ? and task_id = ?\\E"). + WithArgs(updateArgs...). + WillReturnResult(sqlmock.NewResult(int64(0), int64(1))) + } + + s.mockDB.ExpectCommit() + + if checksum != nil { + s.mockDB.ExpectExec("\\Qupdate `test`.`table_meta` set total_kvs_base = ?, total_bytes_base = ?, checksum_base = ?, status = ? where table_id = ? and task_id = ?\\E"). + WithArgs(checksum.SumKVS(), checksum.SumSize(), checksum.Sum(), metaStatusRestoreStarted.String(), int64(1), int64(1)). + WillReturnResult(sqlmock.NewResult(int64(0), int64(1))) + s.checksumMgr.checksum = RemoteChecksum{ + TotalBytes: checksum.SumSize(), + TotalKVs: checksum.SumKVS(), + Checksum: checksum.Sum(), + } + } + + if updateStatus != nil { + s.mockDB.ExpectExec("\\Qupdate `test`.`table_meta` set status = ? where table_id = ? and task_id = ?\\E"). + WithArgs(*updateStatus, int64(1), int64(1)). + WillReturnResult(sqlmock.NewResult(int64(0), int64(1))) + } +} diff --git a/pkg/lightning/restore/restore.go b/pkg/lightning/restore/restore.go index c4708372e..9f5d79ab0 100644 --- a/pkg/lightning/restore/restore.go +++ b/pkg/lightning/restore/restore.go @@ -59,6 +59,7 @@ import ( "github.com/pingcap/br/pkg/pdutil" "github.com/pingcap/br/pkg/storage" "github.com/pingcap/br/pkg/utils" + "github.com/pingcap/br/pkg/version" "github.com/pingcap/br/pkg/version/build" ) @@ -81,6 +82,32 @@ const ( ) const ( + taskMetaTableName = "task_meta" + tableMetaTableName = "table_meta" + // CreateTableMetadataTable stores the per-table sub jobs information used by TiDB Lightning + CreateTableMetadataTable = `CREATE TABLE IF NOT EXISTS %s ( + task_id BIGINT(20) UNSIGNED, + table_id BIGINT(64) NOT NULL, + table_name VARCHAR(64) NOT NULL, + row_id_base BIGINT(20) NOT NULL DEFAULT 0, + row_id_max BIGINT(20) NOT NULL DEFAULT 0, + total_kvs_base BIGINT(20) UNSIGNED NOT NULL DEFAULT 0, + total_bytes_base BIGINT(20) UNSIGNED NOT NULL DEFAULT 0, + checksum_base BIGINT(20) UNSIGNED NOT NULL DEFAULT 0, + total_kvs BIGINT(20) UNSIGNED NOT NULL DEFAULT 0, + total_bytes BIGINT(20) UNSIGNED NOT NULL DEFAULT 0, + checksum BIGINT(20) UNSIGNED NOT NULL DEFAULT 0, + status VARCHAR(32) NOT NULL, + PRIMARY KEY (table_id, task_id) + );` + // CreateTaskMetaTable stores the pre-lightning metadata used by TiDB Lightning + CreateTaskMetaTable = `CREATE TABLE IF NOT EXISTS %s ( + task_id BIGINT(20) UNSIGNED NOT NULL, + pd_cfgs VARCHAR(2048) NOT NULL DEFAULT '', + status VARCHAR(32) NOT NULL, + PRIMARY KEY (task_id) + );` + compactionLowerThreshold = 512 * units.MiB compactionUpperThreshold = 32 * units.GiB ) @@ -173,6 +200,7 @@ type Controller struct { closedEngineLimit *worker.Pool store storage.ExternalStorage + metaMgrBuilder metaMgrBuilder diskQuotaLock sync.RWMutex diskQuotaState int32 @@ -217,6 +245,10 @@ func NewRestoreControllerWithPauser( if err := verifyCheckpoint(cfg, taskCp); err != nil { return nil, errors.Trace(err) } + // reuse task id to reuse task meta correctly. + if taskCp != nil { + cfg.TaskID = taskCp.TaskID + } var backend backend.Backend switch cfg.TikvImporter.Backend { @@ -233,12 +265,12 @@ func NewRestoreControllerWithPauser( } backend = tidb.NewTiDBBackend(db, cfg.TikvImporter.OnDuplicate) case config.BackendLocal: - var rLimit uint64 + var rLimit local.Rlim_t rLimit, err = local.GetSystemRLimit() if err != nil { return nil, err } - maxOpenFiles := int(rLimit / uint64(cfg.App.TableConcurrency)) + maxOpenFiles := int(rLimit / local.Rlim_t(cfg.App.TableConcurrency)) // check overflow if maxOpenFiles < 0 { maxOpenFiles = math.MaxInt32 @@ -272,6 +304,23 @@ func NewRestoreControllerWithPauser( ts = oracle.ComposeTS(physical, logical) } + var metaBuilder metaMgrBuilder + switch cfg.TikvImporter.Backend { + case config.BackendLocal, config.BackendImporter: + // TODO: support Lightning via SQL + db, err := g.GetDB() + if err != nil { + return nil, errors.Trace(err) + } + metaBuilder = &dbMetaMgrBuilder{ + db: db, + taskID: cfg.TaskID, + schema: cfg.App.MetaSchemaName, + } + default: + metaBuilder = noopMetaMgrBuilder{} + } + rc := &Controller{ cfg: cfg, dbMetas: dbMetas, @@ -291,8 +340,9 @@ func NewRestoreControllerWithPauser( saveCpCh: make(chan saveCp), closedEngineLimit: worker.NewPool(ctx, cfg.App.TableConcurrency*2, "closed-engine"), - store: s, - ts: ts, + store: s, + ts: ts, + metaMgrBuilder: metaBuilder, } return rc, nil @@ -639,31 +689,6 @@ func (rc *Controller) restoreSchema(ctx context.Context) error { } rc.dbInfos = dbInfos - if rc.cfg.TikvImporter.Backend != config.BackendTiDB { - for _, dbMeta := range rc.dbMetas { - for _, tableMeta := range dbMeta.Tables { - tableName := common.UniqueTable(dbMeta.Name, tableMeta.Name) - - // if checkpoint enable and not missing, we skip the check table empty progress. - if rc.cfg.Checkpoint.Enable { - _, err := rc.checkpointsDB.Get(ctx, tableName) - switch { - case err == nil: - continue - case errors.IsNotFound(err): - default: - return err - } - } - - err := rc.checkTableEmpty(ctx, tableName) - if err != nil { - return err - } - } - } - } - // Load new checkpoints err = rc.checkpointsDB.Initialize(ctx, rc.cfg, dbInfos) if err != nil { @@ -925,144 +950,173 @@ func (rc *Controller) listenCheckpointUpdates() { rc.checkpointsWg.Done() } -func (rc *Controller) runPeriodicActions(ctx context.Context, stop <-chan struct{}) { +// buildRunPeriodicActionAndCancelFunc build the runPeriodicAction func and a cancel func +func (rc *Controller) buildRunPeriodicActionAndCancelFunc(ctx context.Context, stop <-chan struct{}) (func(), func(bool)) { + cancelFuncs := make([]func(bool), 0) + closeFuncs := make([]func(), 0) // a nil channel blocks forever. // if the cron duration is zero we use the nil channel to skip the action. var logProgressChan <-chan time.Time if rc.cfg.Cron.LogProgress.Duration > 0 { logProgressTicker := time.NewTicker(rc.cfg.Cron.LogProgress.Duration) - defer logProgressTicker.Stop() + closeFuncs = append(closeFuncs, func() { + logProgressTicker.Stop() + }) logProgressChan = logProgressTicker.C } glueProgressTicker := time.NewTicker(3 * time.Second) - defer glueProgressTicker.Stop() + closeFuncs = append(closeFuncs, func() { + glueProgressTicker.Stop() + }) var switchModeChan <-chan time.Time // tidb backend don't need to switch tikv to import mode if rc.cfg.TikvImporter.Backend != config.BackendTiDB && rc.cfg.Cron.SwitchMode.Duration > 0 { switchModeTicker := time.NewTicker(rc.cfg.Cron.SwitchMode.Duration) - defer switchModeTicker.Stop() + cancelFuncs = append(cancelFuncs, func(bool) { switchModeTicker.Stop() }) + cancelFuncs = append(cancelFuncs, func(do bool) { + if do { + log.L().Info("switch to normal mode") + if err := rc.switchToNormalMode(ctx); err != nil { + log.L().Warn("switch tikv to normal mode failed", zap.Error(err)) + } + } + }) switchModeChan = switchModeTicker.C - - rc.switchToImportMode(ctx) } var checkQuotaChan <-chan time.Time // only local storage has disk quota concern. if rc.cfg.TikvImporter.Backend == config.BackendLocal && rc.cfg.Cron.CheckDiskQuota.Duration > 0 { checkQuotaTicker := time.NewTicker(rc.cfg.Cron.CheckDiskQuota.Duration) - defer checkQuotaTicker.Stop() + cancelFuncs = append(cancelFuncs, func(bool) { checkQuotaTicker.Stop() }) checkQuotaChan = checkQuotaTicker.C } - start := time.Now() - for { - select { - case <-ctx.Done(): - log.L().Warn("stopping periodic actions", log.ShortError(ctx.Err())) - return - case <-stop: - log.L().Info("everything imported, stopping periodic actions") - return - - case <-switchModeChan: - // periodically switch to import mode, as requested by TiKV 3.0 - rc.switchToImportMode(ctx) - - case <-logProgressChan: - // log the current progress periodically, so OPS will know that we're still working - nanoseconds := float64(time.Since(start).Nanoseconds()) - // the estimated chunk is not accurate(likely under estimated), but the actual count is not accurate - // before the last table start, so use the bigger of the two should be a workaround - estimated := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStateEstimated)) - pending := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStatePending)) - if estimated < pending { - estimated = pending - } - finished := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStateFinished)) - totalTables := metric.ReadCounter(metric.TableCounter.WithLabelValues(metric.TableStatePending, metric.TableResultSuccess)) - completedTables := metric.ReadCounter(metric.TableCounter.WithLabelValues(metric.TableStateCompleted, metric.TableResultSuccess)) - bytesRead := metric.ReadHistogramSum(metric.RowReadBytesHistogram) - engineEstimated := metric.ReadCounter(metric.ProcessedEngineCounter.WithLabelValues(metric.ChunkStateEstimated, metric.TableResultSuccess)) - enginePending := metric.ReadCounter(metric.ProcessedEngineCounter.WithLabelValues(metric.ChunkStatePending, metric.TableResultSuccess)) - if engineEstimated < enginePending { - engineEstimated = enginePending - } - engineFinished := metric.ReadCounter(metric.ProcessedEngineCounter.WithLabelValues(metric.TableStateImported, metric.TableResultSuccess)) - bytesWritten := metric.ReadCounter(metric.BytesCounter.WithLabelValues(metric.TableStateWritten)) - bytesImported := metric.ReadCounter(metric.BytesCounter.WithLabelValues(metric.TableStateImported)) - - var state string - var remaining zap.Field - switch { - case finished >= estimated: - if engineFinished < engineEstimated { - state = "importing" - } else { - state = "post-processing" - } - case finished > 0: - state = "writing" - default: - state = "preparing" - } - - // since we can't accurately estimate the extra time cost by import after all writing are finished, - // so here we use estimatedWritingProgress * 0.8 + estimatedImportingProgress * 0.2 as the total - // progress. - remaining = zap.Skip() - totalPercent := 0.0 - if finished > 0 { - writePercent := math.Min(finished/estimated, 1.0) - importPercent := 1.0 - if bytesWritten > 0 { - totalBytes := bytesWritten / writePercent - importPercent = math.Min(bytesImported/totalBytes, 1.0) - } - totalPercent = writePercent*0.8 + importPercent*0.2 - if totalPercent < 1.0 { - remainNanoseconds := (1.0 - totalPercent) / totalPercent * nanoseconds - remaining = zap.Duration("remaining", time.Duration(remainNanoseconds).Round(time.Second)) + return func() { + defer func() { + for _, f := range closeFuncs { + f() } + }() + // tidb backend don't need to switch tikv to import mode + if rc.cfg.TikvImporter.Backend != config.BackendTiDB && rc.cfg.Cron.SwitchMode.Duration > 0 { + rc.switchToImportMode(ctx) } + start := time.Now() + for { + select { + case <-ctx.Done(): + log.L().Warn("stopping periodic actions", log.ShortError(ctx.Err())) + return + case <-stop: + log.L().Info("everything imported, stopping periodic actions") + return - formatPercent := func(finish, estimate float64) string { - speed := "" - if estimated > 0 { - speed = fmt.Sprintf(" (%.1f%%)", finish/estimate*100) - } - return speed - } + case <-switchModeChan: + // periodically switch to import mode, as requested by TiKV 3.0 + rc.switchToImportMode(ctx) + + case <-logProgressChan: + // log the current progress periodically, so OPS will know that we're still working + nanoseconds := float64(time.Since(start).Nanoseconds()) + // the estimated chunk is not accurate(likely under estimated), but the actual count is not accurate + // before the last table start, so use the bigger of the two should be a workaround + estimated := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStateEstimated)) + pending := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStatePending)) + if estimated < pending { + estimated = pending + } + finished := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStateFinished)) + totalTables := metric.ReadCounter(metric.TableCounter.WithLabelValues(metric.TableStatePending, metric.TableResultSuccess)) + completedTables := metric.ReadCounter(metric.TableCounter.WithLabelValues(metric.TableStateCompleted, metric.TableResultSuccess)) + bytesRead := metric.ReadHistogramSum(metric.RowReadBytesHistogram) + engineEstimated := metric.ReadCounter(metric.ProcessedEngineCounter.WithLabelValues(metric.ChunkStateEstimated, metric.TableResultSuccess)) + enginePending := metric.ReadCounter(metric.ProcessedEngineCounter.WithLabelValues(metric.ChunkStatePending, metric.TableResultSuccess)) + if engineEstimated < enginePending { + engineEstimated = enginePending + } + engineFinished := metric.ReadCounter(metric.ProcessedEngineCounter.WithLabelValues(metric.TableStateImported, metric.TableResultSuccess)) + bytesWritten := metric.ReadCounter(metric.BytesCounter.WithLabelValues(metric.TableStateWritten)) + bytesImported := metric.ReadCounter(metric.BytesCounter.WithLabelValues(metric.TableStateImported)) - // avoid output bytes speed if there are no unfinished chunks - chunkSpeed := zap.Skip() - if bytesRead > 0 { - chunkSpeed = zap.Float64("speed(MiB/s)", bytesRead/(1048576e-9*nanoseconds)) - } + var state string + var remaining zap.Field + switch { + case finished >= estimated: + if engineFinished < engineEstimated { + state = "importing" + } else { + state = "post-processing" + } + case finished > 0: + state = "writing" + default: + state = "preparing" + } - // Note: a speed of 28 MiB/s roughly corresponds to 100 GiB/hour. - log.L().Info("progress", - zap.String("total", fmt.Sprintf("%.1f%%", totalPercent*100)), - // zap.String("files", fmt.Sprintf("%.0f/%.0f (%.1f%%)", finished, estimated, finished/estimated*100)), - zap.String("tables", fmt.Sprintf("%.0f/%.0f%s", completedTables, totalTables, formatPercent(completedTables, totalTables))), - zap.String("chunks", fmt.Sprintf("%.0f/%.0f%s", finished, estimated, formatPercent(finished, estimated))), - zap.String("engines", fmt.Sprintf("%.f/%.f%s", engineFinished, engineEstimated, formatPercent(engineFinished, engineEstimated))), - chunkSpeed, - zap.String("state", state), - remaining, - ) + // since we can't accurately estimate the extra time cost by import after all writing are finished, + // so here we use estimatedWritingProgress * 0.8 + estimatedImportingProgress * 0.2 as the total + // progress. + remaining = zap.Skip() + totalPercent := 0.0 + if finished > 0 { + writePercent := math.Min(finished/estimated, 1.0) + importPercent := 1.0 + if bytesWritten > 0 { + totalBytes := bytesWritten / writePercent + importPercent = math.Min(bytesImported/totalBytes, 1.0) + } + totalPercent = writePercent*0.8 + importPercent*0.2 + if totalPercent < 1.0 { + remainNanoseconds := (1.0 - totalPercent) / totalPercent * nanoseconds + remaining = zap.Duration("remaining", time.Duration(remainNanoseconds).Round(time.Second)) + } + } - case <-checkQuotaChan: - // verify the total space occupied by sorted-kv-dir is below the quota, - // otherwise we perform an emergency import. - rc.enforceDiskQuota(ctx) + formatPercent := func(finish, estimate float64) string { + speed := "" + if estimated > 0 { + speed = fmt.Sprintf(" (%.1f%%)", finish/estimate*100) + } + return speed + } + + // avoid output bytes speed if there are no unfinished chunks + chunkSpeed := zap.Skip() + if bytesRead > 0 { + chunkSpeed = zap.Float64("speed(MiB/s)", bytesRead/(1048576e-9*nanoseconds)) + } - case <-glueProgressTicker.C: - finished := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStateFinished)) - rc.tidbGlue.Record(glue.RecordFinishedChunk, uint64(finished)) + // Note: a speed of 28 MiB/s roughly corresponds to 100 GiB/hour. + log.L().Info("progress", + zap.String("total", fmt.Sprintf("%.1f%%", totalPercent*100)), + // zap.String("files", fmt.Sprintf("%.0f/%.0f (%.1f%%)", finished, estimated, finished/estimated*100)), + zap.String("tables", fmt.Sprintf("%.0f/%.0f%s", completedTables, totalTables, formatPercent(completedTables, totalTables))), + zap.String("chunks", fmt.Sprintf("%.0f/%.0f%s", finished, estimated, formatPercent(finished, estimated))), + zap.String("engines", fmt.Sprintf("%.f/%.f%s", engineFinished, engineEstimated, formatPercent(engineFinished, engineEstimated))), + chunkSpeed, + zap.String("state", state), + remaining, + ) + + case <-checkQuotaChan: + // verify the total space occupied by sorted-kv-dir is below the quota, + // otherwise we perform an emergency import. + rc.enforceDiskQuota(ctx) + + case <-glueProgressTicker.C: + finished := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStateFinished)) + rc.tidbGlue.Record(glue.RecordFinishedChunk, uint64(finished)) + } + } + }, func(do bool) { + log.L().Info("cancel periodic actions", zap.Bool("do", do)) + for _, f := range cancelFuncs { + f(do) + } } - } } var checksumManagerKey struct{} @@ -1070,10 +1124,19 @@ var checksumManagerKey struct{} func (rc *Controller) restoreTables(ctx context.Context) error { logTask := log.L().Begin(zap.InfoLevel, "restore all tables data") + if err := rc.metaMgrBuilder.Init(ctx); err != nil { + return err + } + // for local backend, we should disable some pd scheduler and change some settings, to // make split region and ingest sst more stable // because importer backend is mostly use for v3.x cluster which doesn't support these api, // so we also don't do this for import backend + finishSchedulers := func() {} + // if one lightning failed abnormally, and can't determine whether it needs to switch back, + // we do not do switch back automatically + cleanupFunc := func() {} + switchBack := false if rc.cfg.TikvImporter.Backend == config.BackendLocal { // disable some pd schedulers pdController, err := pdutil.NewPdController(ctx, rc.cfg.TiDB.PdAddr, @@ -1081,20 +1144,56 @@ func (rc *Controller) restoreTables(ctx context.Context) error { if err != nil { return errors.Trace(err) } + + mgr := rc.metaMgrBuilder.TaskMetaMgr(pdController) + if err = mgr.InitTask(ctx); err != nil { + return err + } + logTask.Info("removing PD leader®ion schedulers") - restoreFn, e := pdController.RemoveSchedulers(ctx) - defer func() { - // use context.Background to make sure this restore function can still be executed even if ctx is canceled - if restoreE := restoreFn(context.Background()); restoreE != nil { - logTask.Warn("failed to restore removed schedulers, you may need to restore them manually", zap.Error(restoreE)) - return + + restoreFn, err := mgr.CheckAndPausePdSchedulers(ctx) + finishSchedulers = func() { + if restoreFn != nil { + // use context.Background to make sure this restore function can still be executed even if ctx is canceled + restoreCtx := context.Background() + needSwitchBack, err := mgr.CheckAndFinishRestore(restoreCtx) + if err != nil { + logTask.Warn("check restore pd schedulers failed", zap.Error(err)) + return + } + switchBack = needSwitchBack + if needSwitchBack { + if restoreE := restoreFn(restoreCtx); restoreE != nil { + logTask.Warn("failed to restore removed schedulers, you may need to restore them manually", zap.Error(restoreE)) + } + // clean up task metas + if cleanupErr := mgr.Cleanup(restoreCtx); cleanupErr != nil { + logTask.Warn("failed to clean task metas, you may need to restore them manually", zap.Error(cleanupErr)) + } + // cleanup table meta and schema db if needed. + cleanupFunc = func() { + if e := mgr.CleanupAllMetas(restoreCtx); err != nil { + logTask.Warn("failed to clean table task metas, you may need to restore them manually", zap.Error(e)) + } + } + } + + logTask.Info("add back PD leader®ion schedulers") } - logTask.Info("add back PD leader®ion schedulers") - }() - if e != nil { + + pdController.Close() + } + + if err != nil { return errors.Trace(err) } } + defer func() { + if switchBack { + cleanupFunc() + } + }() type task struct { tr *TableRestore @@ -1111,7 +1210,18 @@ func (rc *Controller) restoreTables(ctx context.Context) error { var restoreErr common.OnceError stopPeriodicActions := make(chan struct{}) - go rc.runPeriodicActions(ctx, stopPeriodicActions) + + periodicActions, cancelFunc := rc.buildRunPeriodicActionAndCancelFunc(ctx, stopPeriodicActions) + go periodicActions() + finishFuncCalled := false + defer func() { + if !finishFuncCalled { + finishSchedulers() + cancelFunc(switchBack) + finishFuncCalled = true + } + }() + defer close(stopPeriodicActions) taskCh := make(chan task, rc.cfg.App.IndexConcurrency) @@ -1258,17 +1368,24 @@ func (rc *Controller) restoreTables(ctx context.Context) error { default: } + // stop periodic tasks for restore table such as pd schedulers and switch-mode tasks. + // this can help make cluster switching back to normal state more quickly. + // finishSchedulers() + // cancelFunc(switchBack) + // finishFuncCalled = true + close(postProcessTaskChan) // otherwise, we should run all tasks in the post-process task chan for i := 0; i < rc.cfg.App.TableConcurrency; i++ { wg.Add(1) go func() { + defer wg.Done() for task := range postProcessTaskChan { + metaMgr := rc.metaMgrBuilder.TableMetaMgr(task.tr) // force all the remain post-process tasks to be executed - _, err := task.tr.postProcess(ctx2, rc, task.cp, true) + _, err = task.tr.postProcess(ctx2, rc, task.cp, true, metaMgr) restoreErr.Set(err) } - wg.Done() }() } wg.Wait() @@ -1291,6 +1408,7 @@ func (tr *TableRestore) restoreTable( default: } + metaMgr := rc.metaMgrBuilder.TableMetaMgr(tr) // no need to do anything if the chunks are already populated if len(cp.Engines) > 0 { tr.logger.Info("reusing engines and files info from checkpoint", @@ -1298,9 +1416,55 @@ func (tr *TableRestore) restoreTable( zap.Int("filesCnt", cp.CountChunks()), ) } else if cp.Status < checkpoints.CheckpointStatusAllWritten { + versionStr, err := rc.tidbGlue.GetSQLExecutor().ObtainStringWithLog( + ctx, "SELECT version()", "fetch tidb version", log.L()) + if err != nil { + return false, errors.Trace(err) + } + + tidbVersion, err := version.ExtractTiDBVersion(versionStr) + if err != nil { + return false, errors.Trace(err) + } + if err := tr.populateChunks(ctx, rc, cp); err != nil { return false, errors.Trace(err) } + + // fetch the max chunk row_id max value as the global max row_id + rowIDMax := int64(0) + for _, engine := range cp.Engines { + if len(engine.Chunks) > 0 && engine.Chunks[len(engine.Chunks)-1].Chunk.RowIDMax > rowIDMax { + rowIDMax = engine.Chunks[len(engine.Chunks)-1].Chunk.RowIDMax + } + } + + // "show table next_row_id" is only available after v4.0.0 + if tidbVersion.Major >= 4 && (rc.cfg.TikvImporter.Backend == config.BackendLocal || rc.cfg.TikvImporter.Backend == config.BackendImporter) { + // first, insert a new-line into meta table + if err = metaMgr.InitTableMeta(ctx); err != nil { + return false, err + } + + checksum, rowIDBase, err := metaMgr.AllocTableRowIDs(ctx, rowIDMax) + if err != nil { + return false, err + } + tr.RebaseChunkRowIDs(cp, rowIDBase) + + if checksum != nil { + if cp.Checksum != *checksum { + cp.Checksum = *checksum + rc.saveCpCh <- saveCp{ + tableName: tr.tableName, + merger: &checkpoints.TableChecksumMerger{ + Checksum: cp.Checksum, + }, + } + } + tr.logger.Info("checksum before restore table", zap.Object("checksum", &cp.Checksum)) + } + } if err := rc.checkpointsDB.InsertEngineCheckpoints(ctx, tr.tableName, cp.Engines); err != nil { return false, errors.Trace(err) } @@ -1332,8 +1496,13 @@ func (tr *TableRestore) restoreTable( return false, errors.Trace(err) } + err = metaMgr.UpdateTableStatus(ctx, metaStatusRestoreFinished) + if err != nil { + return false, errors.Trace(err) + } + // 3. Post-process. With the last parameter set to false, we can allow delay analyze execute latter - return tr.postProcess(ctx, rc, cp, false /* force-analyze */) + return tr.postProcess(ctx, rc, cp, false /* force-analyze */, metaMgr) } // estimate SST files compression threshold by total row file size @@ -1489,10 +1658,6 @@ func (tr *TableRestore) restoreEngines(pCtx context.Context, rc *Controller, cp return } - failpoint.Inject("FailBeforeDataEngineImported", func() { - panic("forcing failure due to FailBeforeDataEngineImported") - }) - dataWorker := rc.closedEngineLimit.Apply() defer rc.closedEngineLimit.Recycle(dataWorker) if err := tr.importEngine(ctx, dataClosedEngine, rc, eid, ecp); err != nil { @@ -1633,8 +1798,8 @@ func (tr *TableRestore) restoreEngine( checkFlushLock.Unlock() select { - case <-ctx.Done(): - return nil, ctx.Err() + case <-pCtx.Done(): + return nil, pCtx.Err() default: } @@ -1821,6 +1986,7 @@ func (tr *TableRestore) postProcess( rc *Controller, cp *checkpoints.TableCheckpoint, forcePostProcess bool, + metaMgr tableMetaMgr, ) (bool, error) { // there are no data in this table, no need to do post process // this is important for tables that are just the dump table of views @@ -1874,8 +2040,21 @@ func (tr *TableRestore) postProcess( } else { if forcePostProcess || !rc.cfg.PostRestore.PostProcessAtLast { tr.logger.Info("local checksum", zap.Object("checksum", &localChecksum)) - err := tr.compareChecksum(ctx, localChecksum) + needChecksum, baseTotalChecksum, err := metaMgr.CheckAndUpdateLocalChecksum(ctx, &localChecksum) + if err != nil { + return false, err + } + if !needChecksum { + return false, nil + } + if cp.Checksum.SumKVS() > 0 || baseTotalChecksum.SumKVS() > 0 { + localChecksum.Add(&cp.Checksum) + localChecksum.Add(baseTotalChecksum) + tr.logger.Info("merged local checksum", zap.Object("checksum", &localChecksum)) + } + + err = tr.compareChecksum(ctx, localChecksum) // with post restore level 'optional', we will skip checksum error if rc.cfg.PostRestore.Checksum == config.OpLevelOptional { if err != nil { @@ -1883,10 +2062,15 @@ func (tr *TableRestore) postProcess( err = nil } } + if err == nil { + err = metaMgr.FinishTable(ctx) + } + rc.saveStatusCheckpoint(tr.tableName, checkpoints.WholeTableEngineID, err, checkpoints.CheckpointStatusChecksummed) if err != nil { return false, errors.Trace(err) } + cp.Status = checkpoints.CheckpointStatusChecksummed } else { finished = false @@ -2093,6 +2277,7 @@ func (rc *Controller) setGlobalVariables(ctx context.Context) error { // we should enable/disable new collation here since in server mode, tidb config // may be different in different tasks collate.SetNewCollationEnabledForTest(enabled) + return nil } @@ -2277,6 +2462,18 @@ func (tr *TableRestore) populateChunks(ctx context.Context, rc *Controller, cp * return err } +func (t *TableRestore) RebaseChunkRowIDs(cp *checkpoints.TableCheckpoint, rowIDBase int64) { + if rowIDBase == 0 { + return + } + for _, engine := range cp.Engines { + for _, chunk := range engine.Chunks { + chunk.Chunk.PrevRowIDMax += rowIDBase + chunk.Chunk.RowIDMax += rowIDBase + } + } +} + // initializeColumns computes the "column permutation" for an INSERT INTO // statement. Suppose a table has columns (a, b, c, d) in canonical order, and // we execute `INSERT INTO (d, b, a) VALUES ...`, we will need to remap the @@ -2674,6 +2871,7 @@ func (cr *chunkRestore) encodeLoop( canDeliver := false kvPacket := make([]deliveredKVs, 0, maxKvPairsCnt) var newOffset, rowID int64 + var kvSize uint64 outLoop: for !canDeliver { readDurStart := time.Now() @@ -2709,8 +2907,16 @@ func (cr *chunkRestore) encodeLoop( return } kvPacket = append(kvPacket, deliveredKVs{kvs: kvs, columns: columnNames, offset: newOffset, rowID: rowID}) - if len(kvPacket) >= maxKvPairsCnt || newOffset == cr.chunk.Chunk.EndOffset { + kvSize += kvs.Size() + failpoint.Inject("mock-kv-size", func(val failpoint.Value) { + kvSize += uint64(val.(int)) + }) + // pebble cannot allow > 4.0G kv in one batch. + // we will meet pebble panic when import sql file and each kv has the size larger than 4G / maxKvPairsCnt. + // so add this check. + if kvSize >= minDeliverBytes || len(kvPacket) >= maxKvPairsCnt || newOffset == cr.chunk.Chunk.EndOffset { canDeliver = true + kvSize = 0 } } encodeTotalDur += encodeDur diff --git a/pkg/lightning/restore/restore_test.go b/pkg/lightning/restore/restore_test.go index 620ebef88..715da3ce3 100644 --- a/pkg/lightning/restore/restore_test.go +++ b/pkg/lightning/restore/restore_test.go @@ -863,6 +863,7 @@ func (s *tableRestoreSuite) TestTableRestoreMetrics(c *C) { c.Assert(err, IsNil) cpDB := checkpoints.NewNullCheckpointsDB() + g := mock.NewMockGlue(controller) rc := &Controller{ cfg: cfg, dbMetas: []*mydump.MDDatabaseMeta{ @@ -882,17 +883,22 @@ func (s *tableRestoreSuite) TestTableRestoreMetrics(c *C) { saveCpCh: chptCh, pauser: DeliverPauser, backend: noop.NewNoopBackend(), - tidbGlue: mock.NewMockGlue(controller), + tidbGlue: g, errorSummaries: makeErrorSummaries(log.L()), tls: tls, checkpointsDB: cpDB, closedEngineLimit: worker.NewPool(ctx, 1, "closed_engine"), store: s.store, + metaMgrBuilder: noopMetaMgrBuilder{}, } go func() { for range chptCh { } }() + exec := mock.NewMockSQLExecutor(controller) + g.EXPECT().GetSQLExecutor().Return(exec).AnyTimes() + exec.EXPECT().ObtainStringWithLog(gomock.Any(), "SELECT version()", gomock.Any(), gomock.Any()). + Return("5.7.25-TiDB-v5.0.1", nil).AnyTimes() web.BroadcastInitProgress(rc.dbMetas) @@ -1151,6 +1157,57 @@ func (s *chunkRestoreSuite) TestEncodeLoopForcedError(c *C) { c.Assert(kvsCh, HasLen, 0) } +func (s *chunkRestoreSuite) TestEncodeLoopDeliverLimit(c *C) { + ctx := context.Background() + kvsCh := make(chan []deliveredKVs, 4) + deliverCompleteCh := make(chan deliverResult) + kvEncoder, err := kv.NewTableKVEncoder(s.tr.encTable, &kv.SessionOptions{ + SQLMode: s.cfg.TiDB.SQLMode, + Timestamp: 1234567898, + }) + c.Assert(err, IsNil) + + dir := c.MkDir() + fileName := "db.limit.000.csv" + err = ioutil.WriteFile(filepath.Join(dir, fileName), []byte("1,2,3\r\n4,5,6\r\n7,8,9\r"), 0o644) + c.Assert(err, IsNil) + + store, err := storage.NewLocalStorage(dir) + c.Assert(err, IsNil) + cfg := config.NewConfig() + + reader, err := store.Open(ctx, fileName) + c.Assert(err, IsNil) + w := worker.NewPool(ctx, 1, "io") + p := mydump.NewCSVParser(&cfg.Mydumper.CSV, reader, 111, w, false) + s.cr.parser = p + + rc := &Controller{pauser: DeliverPauser, cfg: cfg} + c.Assert(failpoint.Enable( + "github.com/pingcap/br/pkg/lightning/restore/mock-kv-size", "return(110000000)"), IsNil) + _, _, err = s.cr.encodeLoop(ctx, kvsCh, s.tr, s.tr.logger, kvEncoder, deliverCompleteCh, rc) + + // we have 3 kvs total. after the failpoint injected. + // we will send one kv each time. + count := 0 + for { + kvs, ok := <-kvsCh + if !ok { + break + } + count += 1 + if count <= 3 { + c.Assert(kvs, HasLen, 1) + } + if count == 4 { + // we will send empty kvs before encodeLoop exists + // so, we can receive 4 batch and 1 is empty + c.Assert(kvs, HasLen, 0) + break + } + } +} + func (s *chunkRestoreSuite) TestEncodeLoopDeliverErrored(c *C) { ctx := context.Background() kvsCh := make(chan []deliveredKVs) @@ -1436,3 +1493,13 @@ func (s *restoreSchemaSuite) TestRestoreSchemaContextCancel(c *C) { c.Assert(err, NotNil) c.Assert(err, Equals, childCtx.Err()) } + +type testChecksumMgr struct { + checksum RemoteChecksum + callCnt int +} + +func (t *testChecksumMgr) Checksum(ctx context.Context, tableInfo *checkpoints.TidbTableInfo) (*RemoteChecksum, error) { + t.callCnt++ + return &t.checksum, nil +} diff --git a/pkg/lightning/restore/tidb.go b/pkg/lightning/restore/tidb.go index 312d8b050..86db9cad0 100644 --- a/pkg/lightning/restore/tidb.go +++ b/pkg/lightning/restore/tidb.go @@ -98,6 +98,8 @@ func DBFromConfig(dsn config.DBStore) (*sql.DB, error) { "allow_auto_random_explicit_insert": "1", // allow use _tidb_rowid in sql statement "tidb_opt_write_row_id": "1", + // always set auto-commit to ON + "autocommit": "1", }, } db, err := param.Connect() @@ -255,7 +257,10 @@ func LoadSchemaInfo( } for _, tbl := range schema.Tables { - tblInfo := tableMap[strings.ToLower(tbl.Name)] + tblInfo, ok := tableMap[strings.ToLower(tbl.Name)] + if !ok { + return nil, errors.Errorf("table '%s' schema not found", tbl.Name) + } tableName := tblInfo.Name.String() if tblInfo.State != model.StatePublic { err := errors.Errorf("table [%s.%s] state is not public", schema.Name, tableName) diff --git a/pkg/logutil/logging.go b/pkg/logutil/logging.go index 7e803ba64..ef280a69a 100644 --- a/pkg/logutil/logging.go +++ b/pkg/logutil/logging.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/google/uuid" + "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/backup" "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/metapb" @@ -160,6 +161,22 @@ func SSTMeta(sstMeta *import_sstpb.SSTMeta) zap.Field { return zap.Object("sstMeta", zapSSTMetaMarshaler{sstMeta}) } +type zapSSTMetasMarshaler []*import_sstpb.SSTMeta + +func (m zapSSTMetasMarshaler) MarshalLogArray(encoder zapcore.ArrayEncoder) error { + for _, meta := range m { + if err := encoder.AppendObject(zapSSTMetaMarshaler{meta}); err != nil { + return errors.Trace(err) + } + } + return nil +} + +// SSTMetas make the zap fields for SST metas. +func SSTMetas(sstMetas []*import_sstpb.SSTMeta) zap.Field { + return zap.Array("sstMetas", zapSSTMetasMarshaler(sstMetas)) +} + type zapKeysMarshaler [][]byte func (keys zapKeysMarshaler) MarshalLogObject(encoder zapcore.ObjectEncoder) error { diff --git a/pkg/pdutil/pd.go b/pkg/pdutil/pd.go index 104c6e998..63276804b 100644 --- a/pkg/pdutil/pd.go +++ b/pkg/pdutil/pd.go @@ -75,13 +75,13 @@ func constConfigGeneratorBuilder(val interface{}) pauseConfigGenerator { } } -// clusterConfig represents a set of scheduler whose config have been modified +// ClusterConfig represents a set of scheduler whose config have been modified // along with their original config. -type clusterConfig struct { +type ClusterConfig struct { // Enable PD schedulers before restore - scheduler []string + Schedulers []string `json:"schedulers"` // Original scheudle configuration - scheduleCfg map[string]interface{} + ScheduleCfg map[string]interface{} `json:"schedule_cfg"` } type pauseSchedulerBody struct { @@ -527,14 +527,14 @@ func (p *PdController) doPauseConfigs(ctx context.Context, cfg map[string]interf return p.doUpdatePDScheduleConfig(ctx, cfg, post, prefix) } -func restoreSchedulers(ctx context.Context, pd *PdController, clusterCfg clusterConfig) error { - if err := pd.ResumeSchedulers(ctx, clusterCfg.scheduler); err != nil { +func restoreSchedulers(ctx context.Context, pd *PdController, clusterCfg ClusterConfig) error { + if err := pd.ResumeSchedulers(ctx, clusterCfg.Schedulers); err != nil { return errors.Annotate(err, "fail to add PD schedulers") } - log.Info("restoring config", zap.Any("config", clusterCfg.scheduleCfg)) + log.Info("restoring config", zap.Any("config", clusterCfg.ScheduleCfg)) mergeCfg := make(map[string]interface{}) for cfgKey := range expectPDCfg { - value := clusterCfg.scheduleCfg[cfgKey] + value := clusterCfg.ScheduleCfg[cfgKey] if value == nil { // Ignore non-exist config. continue @@ -554,7 +554,8 @@ func restoreSchedulers(ctx context.Context, pd *PdController, clusterCfg cluster return nil } -func (p *PdController) makeUndoFunctionByConfig(config clusterConfig) UndoFunc { +// MakeUndoFunctionByConfig return an UndoFunc based on specified ClusterConfig +func (p *PdController) MakeUndoFunctionByConfig(config ClusterConfig) UndoFunc { restore := func(ctx context.Context) error { return restoreSchedulers(ctx, p, config) } @@ -563,22 +564,38 @@ func (p *PdController) makeUndoFunctionByConfig(config clusterConfig) UndoFunc { // RemoveSchedulers removes the schedulers that may slow down BR speed. func (p *PdController) RemoveSchedulers(ctx context.Context) (undo UndoFunc, err error) { + undo = Nop + + origin, _, err1 := p.RemoveSchedulersWithOrigin(ctx) + if err1 != nil { + err = err1 + return + } + + undo = p.MakeUndoFunctionByConfig(ClusterConfig{Schedulers: origin.Schedulers, ScheduleCfg: origin.ScheduleCfg}) + return undo, errors.Trace(err) +} + +// RemoveSchedulersWithOrigin pause and remove br related schedule configs and return the origin and modified configs +func (p *PdController) RemoveSchedulersWithOrigin(ctx context.Context) (ClusterConfig, ClusterConfig, error) { if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { span1 := span.Tracer().StartSpan("PdController.RemoveSchedulers", opentracing.ChildOf(span.Context())) defer span1.Finish() ctx = opentracing.ContextWithSpan(ctx, span1) } - undo = Nop + originCfg := ClusterConfig{} + removedCfg := ClusterConfig{} stores, err := p.pdClient.GetAllStores(ctx) if err != nil { - return + return originCfg, removedCfg, err } scheduleCfg, err := p.GetPDScheduleConfig(ctx) if err != nil { - return + return originCfg, removedCfg, err } - disablePDCfg := make(map[string]interface{}) + disablePDCfg := make(map[string]interface{}, len(expectPDCfg)) + originPDCfg := make(map[string]interface{}, len(expectPDCfg)) for cfgKey, cfgValFunc := range expectPDCfg { value, ok := scheduleCfg[cfgKey] if !ok { @@ -586,14 +603,17 @@ func (p *PdController) RemoveSchedulers(ctx context.Context) (undo UndoFunc, err continue } disablePDCfg[cfgKey] = cfgValFunc(len(stores), value) + originPDCfg[cfgKey] = value } - undo = p.makeUndoFunctionByConfig(clusterConfig{scheduleCfg: scheduleCfg}) + originCfg.ScheduleCfg = originPDCfg + removedCfg.ScheduleCfg = disablePDCfg + log.Debug("saved PD config", zap.Any("config", scheduleCfg)) // Remove default PD scheduler that may affect restore process. existSchedulers, err := p.ListSchedulers(ctx) if err != nil { - return + return originCfg, removedCfg, err } needRemoveSchedulers := make([]string, 0, len(existSchedulers)) for _, s := range existSchedulers { @@ -602,7 +622,30 @@ func (p *PdController) RemoveSchedulers(ctx context.Context) (undo UndoFunc, err } } + removedSchedulers, err := p.doRemoveSchedulersWith(ctx, needRemoveSchedulers, disablePDCfg) + if err != nil { + return originCfg, removedCfg, err + } + + originCfg.Schedulers = removedSchedulers + removedCfg.Schedulers = removedSchedulers + + return originCfg, removedCfg, nil +} + +// RemoveSchedulersWithCfg removes pd schedulers and configs with specified ClusterConfig +func (p *PdController) RemoveSchedulersWithCfg(ctx context.Context, removeCfg ClusterConfig) error { + _, err := p.doRemoveSchedulersWith(ctx, removeCfg.Schedulers, removeCfg.ScheduleCfg) + return err +} + +func (p *PdController) doRemoveSchedulersWith( + ctx context.Context, + needRemoveSchedulers []string, + disablePDCfg map[string]interface{}, +) ([]string, error) { var removedSchedulers []string + var err error if p.isPauseConfigEnabled() { // after 4.0.8 we can set these config with TTL removedSchedulers, err = p.pauseSchedulersAndConfigWith(ctx, needRemoveSchedulers, disablePDCfg, pdRequest) @@ -611,12 +654,11 @@ func (p *PdController) RemoveSchedulers(ctx context.Context) (undo UndoFunc, err // which doesn't have temporary config setting. err = p.doUpdatePDScheduleConfig(ctx, disablePDCfg, pdRequest) if err != nil { - return + return nil, err } removedSchedulers, err = p.pauseSchedulersAndConfigWith(ctx, needRemoveSchedulers, nil, pdRequest) } - undo = p.makeUndoFunctionByConfig(clusterConfig{scheduler: removedSchedulers, scheduleCfg: scheduleCfg}) - return undo, errors.Trace(err) + return removedSchedulers, err } // Close close the connection to pd. diff --git a/pkg/restore/client.go b/pkg/restore/client.go index fb45debdc..f2623ea82 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -11,6 +11,7 @@ import ( "fmt" "sort" "strconv" + "strings" "time" "github.com/opentracing/opentracing-go" @@ -168,7 +169,7 @@ func (rc *Client) Close() { } // InitBackupMeta loads schemas from BackupMeta to initialize RestoreClient. -func (rc *Client) InitBackupMeta(backupMeta *backuppb.BackupMeta, backend *backuppb.StorageBackend) error { +func (rc *Client) InitBackupMeta(c context.Context, backupMeta *backuppb.BackupMeta, backend *backuppb.StorageBackend) error { if !backupMeta.IsRawKv { databases, err := utils.LoadBackupTables(backupMeta) if err != nil { @@ -189,8 +190,7 @@ func (rc *Client) InitBackupMeta(backupMeta *backuppb.BackupMeta, backend *backu metaClient := NewSplitClient(rc.pdClient, rc.tlsConf) importCli := NewImportClient(metaClient, rc.tlsConf, rc.keepaliveConf) rc.fileImporter = NewFileImporter(metaClient, importCli, backend, rc.backupMeta.IsRawKv, rc.rateLimit) - - return nil + return rc.fileImporter.CheckMultiIngestSupport(c, rc.pdClient) } // IsRawKvMode checks whether the backup data is in raw kv format, in which case transactional recover is forbidden. @@ -541,6 +541,38 @@ func (rc *Client) setSpeedLimit(ctx context.Context) error { return nil } +// isFilesBelongToSameRange check whether two files are belong to the same range with different cf. +func isFilesBelongToSameRange(f1, f2 string) bool { + // the backup date file pattern is `{store_id}_{region_id}_{epoch_version}_{key}_{ts}_{cf}.sst` + // so we need to compare with out the `_{cf}.sst` suffix + idx1 := strings.LastIndex(f1, "_") + idx2 := strings.LastIndex(f2, "_") + + if idx1 < 0 || idx2 < 0 { + panic(fmt.Sprintf("invalid backup data file name: '%s', '%s'", f1, f2)) + } + + return f1[:idx1] == f2[:idx2] +} + +func drainFilesByRange(files []*backuppb.File, supportMulti bool) ([]*backuppb.File, []*backuppb.File) { + if len(files) == 0 { + return nil, nil + } + if !supportMulti { + return files[:1], files[1:] + } + idx := 1 + for idx < len(files) { + if !isFilesBelongToSameRange(files[idx-1].Name, files[idx].Name) { + break + } + idx++ + } + + return files[:idx], files[idx:] +} + // RestoreFiles tries to restore the files. func (rc *Client) RestoreFiles( ctx context.Context, @@ -571,19 +603,21 @@ func (rc *Client) RestoreFiles( return errors.Trace(err) } - for _, file := range files { - fileReplica := file + var rangeFiles []*backuppb.File + for rangeFiles, files = drainFilesByRange(files, rc.fileImporter.supportMultiIngest); len(rangeFiles) != 0; rangeFiles, files = drainFilesByRange(files, rc.fileImporter.supportMultiIngest) { + filesReplica := rangeFiles rc.workerPool.ApplyOnErrorGroup(eg, func() error { fileStart := time.Now() defer func() { - log.Info("import file done", logutil.File(fileReplica), + log.Info("import files done", logutil.Files(filesReplica), zap.Duration("take", time.Since(fileStart))) updateCh.Inc() }() - return rc.fileImporter.Import(ectx, fileReplica, rewriteRules) + return rc.fileImporter.Import(ectx, filesReplica, rewriteRules) }) } + if err := eg.Wait(); err != nil { summary.CollectFailureUnit("file", err) log.Error( @@ -621,7 +655,7 @@ func (rc *Client) RestoreRaw( rc.workerPool.ApplyOnErrorGroup(eg, func() error { defer updateCh.Inc() - return rc.fileImporter.Import(ectx, fileReplica, EmptyRewriteRule()) + return rc.fileImporter.Import(ectx, []*backuppb.File{fileReplica}, EmptyRewriteRule()) }) } if err := eg.Wait(); err != nil { diff --git a/pkg/restore/import.go b/pkg/restore/import.go index bc7038901..8e992f6bd 100644 --- a/pkg/restore/import.go +++ b/pkg/restore/import.go @@ -16,14 +16,18 @@ import ( "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/log" + pd "github.com/tikv/pd/client" "github.com/tikv/pd/pkg/codec" "go.uber.org/multierr" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/backoff" + "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/keepalive" + "google.golang.org/grpc/status" + "github.com/pingcap/br/pkg/conn" berrors "github.com/pingcap/br/pkg/errors" "github.com/pingcap/br/pkg/logutil" "github.com/pingcap/br/pkg/summary" @@ -48,6 +52,11 @@ type ImporterClient interface { storeID uint64, req *import_sstpb.IngestRequest, ) (*import_sstpb.IngestResponse, error) + MultiIngest( + ctx context.Context, + storeID uint64, + req *import_sstpb.MultiIngestRequest, + ) (*import_sstpb.IngestResponse, error) SetDownloadSpeedLimit( ctx context.Context, @@ -59,6 +68,8 @@ type ImporterClient interface { ctx context.Context, storeID uint64, ) (import_sstpb.ImportSSTClient, error) + + SupportMultiIngest(ctx context.Context, stores []uint64) (bool, error) } type importClient struct { @@ -116,6 +127,18 @@ func (ic *importClient) IngestSST( return client.Ingest(ctx, req) } +func (ic *importClient) MultiIngest( + ctx context.Context, + storeID uint64, + req *import_sstpb.MultiIngestRequest, +) (*import_sstpb.IngestResponse, error) { + client, err := ic.GetImportClient(ctx, storeID) + if err != nil { + return nil, errors.Trace(err) + } + return client.MultiIngest(ctx, req) +} + func (ic *importClient) GetImportClient( ctx context.Context, storeID uint64, @@ -155,6 +178,21 @@ func (ic *importClient) GetImportClient( return client, errors.Trace(err) } +func (ic *importClient) SupportMultiIngest(ctx context.Context, stores []uint64) (bool, error) { + for _, storeID := range stores { + _, err := ic.MultiIngest(ctx, storeID, &import_sstpb.MultiIngestRequest{}) + if err != nil { + if s, ok := status.FromError(err); ok { + if s.Code() == codes.Unimplemented { + return false, nil + } + } + return false, errors.Trace(err) + } + } + return true, nil +} + // FileImporter used to import a file to TiKV. type FileImporter struct { metaClient SplitClient @@ -162,9 +200,10 @@ type FileImporter struct { backend *backuppb.StorageBackend rateLimit uint64 - isRawKvMode bool - rawStartKey []byte - rawEndKey []byte + isRawKvMode bool + rawStartKey []byte + rawEndKey []byte + supportMultiIngest bool } // NewFileImporter returns a new file importClient. @@ -184,6 +223,26 @@ func NewFileImporter( } } +// CheckMultiIngestSupport checks whether all stores support multi-ingest +func (importer *FileImporter) CheckMultiIngestSupport(ctx context.Context, pdClient pd.Client) error { + allStores, err := conn.GetAllTiKVStores(ctx, pdClient, conn.SkipTiFlash) + if err != nil { + return errors.Trace(err) + } + storeIDs := make([]uint64, 0, len(allStores)) + for _, s := range allStores { + storeIDs = append(storeIDs, s.Id) + } + + support, err := importer.importClient.SupportMultiIngest(ctx, storeIDs) + if err != nil { + return errors.Trace(err) + } + importer.supportMultiIngest = support + log.L().Info("multi ingest support", zap.Bool("support", support)) + return nil +} + // SetRawRange sets the range to be restored in raw kv mode. func (importer *FileImporter) SetRawRange(startKey, endKey []byte) error { if !importer.isRawKvMode { @@ -198,28 +257,36 @@ func (importer *FileImporter) SetRawRange(startKey, endKey []byte) error { // All rules must contain encoded keys. func (importer *FileImporter) Import( ctx context.Context, - file *backuppb.File, + files []*backuppb.File, rewriteRules *RewriteRules, ) error { - log.Debug("import file", logutil.File(file)) + log.Debug("import file", logutil.Files(files)) // Rewrite the start key and end key of file to scan regions var startKey, endKey []byte - var err error if importer.isRawKvMode { - startKey = file.StartKey - endKey = file.EndKey + startKey = files[0].StartKey + endKey = files[0].EndKey } else { - startKey, endKey, err = rewriteFileKeys(file, rewriteRules) - } - if err != nil { - return errors.Trace(err) + for _, f := range files { + start, end, err := rewriteFileKeys(f, rewriteRules) + if err != nil { + return errors.Trace(err) + } + if len(startKey) == 0 || bytes.Compare(startKey, start) > 0 { + startKey = start + } + if bytes.Compare(endKey, end) < 0 { + endKey = end + } + } } + log.Debug("rewrite file keys", - logutil.File(file), + logutil.Files(files), logutil.Key("startKey", startKey), logutil.Key("endKey", endKey)) - err = utils.WithRetry(ctx, func() error { + err := utils.WithRetry(ctx, func() error { tctx, cancel := context.WithTimeout(ctx, importScanRegionTime) defer cancel() // Scan regions covered by the file range @@ -229,26 +296,36 @@ func (importer *FileImporter) Import( return errors.Trace(errScanRegion) } - log.Debug("scan regions", logutil.File(file), zap.Int("count", len(regionInfos))) + log.Debug("scan regions", logutil.Files(files), zap.Int("count", len(regionInfos))) // Try to download and ingest the file in every region regionLoop: for _, regionInfo := range regionInfos { info := regionInfo // Try to download file. - var downloadMeta *import_sstpb.SSTMeta + downloadMetas := make([]*import_sstpb.SSTMeta, 0, len(files)) + remainFiles := files errDownload := utils.WithRetry(ctx, func() error { var e error - if importer.isRawKvMode { - downloadMeta, e = importer.downloadRawKVSST(ctx, info, file) - } else { - downloadMeta, e = importer.downloadSST(ctx, info, file, rewriteRules) + for i, f := range remainFiles { + var downloadMeta *import_sstpb.SSTMeta + if importer.isRawKvMode { + downloadMeta, e = importer.downloadRawKVSST(ctx, info, f) + } else { + downloadMeta, e = importer.downloadSST(ctx, info, f, rewriteRules) + } + failpoint.Inject("restore-storage-error", func(val failpoint.Value) { + msg := val.(string) + log.Debug("failpoint restore-storage-error injected.", zap.String("msg", msg)) + e = errors.Annotate(e, msg) + }) + if e != nil { + remainFiles = remainFiles[i:] + return errors.Trace(e) + } + downloadMetas = append(downloadMetas, downloadMeta) } - failpoint.Inject("restore-storage-error", func(val failpoint.Value) { - msg := val.(string) - log.Debug("failpoint restore-storage-error injected.", zap.String("msg", msg)) - e = errors.Annotate(e, msg) - }) - return errors.Trace(e) + + return nil }, newDownloadSSTBackoffer()) if errDownload != nil { for _, e := range multierr.Errors(errDownload) { @@ -256,7 +333,7 @@ func (importer *FileImporter) Import( case berrors.ErrKVRewriteRuleNotFound, berrors.ErrKVRangeIsEmpty: // Skip this region log.Warn("download file skipped", - logutil.File(file), + logutil.Files(files), logutil.Region(info.Region), logutil.Key("startKey", startKey), logutil.Key("endKey", endKey), @@ -265,7 +342,7 @@ func (importer *FileImporter) Import( } } log.Error("download file failed", - logutil.File(file), + logutil.Files(files), logutil.Region(info.Region), logutil.Key("startKey", startKey), logutil.Key("endKey", endKey), @@ -273,7 +350,7 @@ func (importer *FileImporter) Import( return errors.Trace(errDownload) } - ingestResp, errIngest := importer.ingestSST(ctx, downloadMeta, info) + ingestResp, errIngest := importer.ingestSSTs(ctx, downloadMetas, info) ingestRetry: for errIngest == nil { errPb := ingestResp.GetError() @@ -312,7 +389,7 @@ func (importer *FileImporter) Import( errIngest = errors.Trace(berrors.ErrKVEpochNotMatch) break ingestRetry } - ingestResp, errIngest = importer.ingestSST(ctx, downloadMeta, newInfo) + ingestResp, errIngest = importer.ingestSSTs(ctx, downloadMetas, newInfo) case errPb.EpochNotMatch != nil: // TODO handle epoch not match error // 1. retry download if needed @@ -331,15 +408,18 @@ func (importer *FileImporter) Import( if errIngest != nil { log.Error("ingest file failed", - logutil.File(file), - logutil.SSTMeta(downloadMeta), + logutil.Files(files), + logutil.SSTMetas(downloadMetas), logutil.Region(info.Region), zap.Error(errIngest)) return errors.Trace(errIngest) } } - summary.CollectSuccessUnit(summary.TotalKV, 1, file.TotalKvs) - summary.CollectSuccessUnit(summary.TotalBytes, 1, file.TotalBytes) + for _, f := range files { + summary.CollectSuccessUnit(summary.TotalKV, 1, f.TotalKvs) + summary.CollectSuccessUnit(summary.TotalBytes, 1, f.TotalBytes) + } + return nil }, newImportSSTBackoffer()) return errors.Trace(err) @@ -456,9 +536,9 @@ func (importer *FileImporter) downloadRawKVSST( return &sstMeta, nil } -func (importer *FileImporter) ingestSST( +func (importer *FileImporter) ingestSSTs( ctx context.Context, - sstMeta *import_sstpb.SSTMeta, + sstMetas []*import_sstpb.SSTMeta, regionInfo *RegionInfo, ) (*import_sstpb.IngestResponse, error) { leader := regionInfo.Leader @@ -470,14 +550,26 @@ func (importer *FileImporter) ingestSST( RegionEpoch: regionInfo.Region.GetRegionEpoch(), Peer: leader, } - req := &import_sstpb.IngestRequest{ - Context: reqCtx, - Sst: sstMeta, + + if !importer.supportMultiIngest { + // TODO: not sure we need this check + if len(sstMetas) != 1 { + panic("do not support batch ingest") + } + req := &import_sstpb.IngestRequest{ + Context: reqCtx, + Sst: sstMetas[0], + } + log.Debug("ingest SST", logutil.SSTMeta(sstMetas[0]), logutil.Leader(leader)) + resp, err := importer.importClient.IngestSST(ctx, leader.GetStoreId(), req) + return resp, errors.Trace(err) } - log.Debug("ingest SST", logutil.SSTMeta(sstMeta), logutil.Leader(leader)) - resp, err := importer.importClient.IngestSST(ctx, leader.GetStoreId(), req) - if err != nil { - return nil, errors.Trace(err) + + req := &import_sstpb.MultiIngestRequest{ + Context: reqCtx, + Ssts: sstMetas, } - return resp, nil + log.Debug("ingest SSTs", logutil.SSTMetas(sstMetas), logutil.Leader(leader)) + resp, err := importer.importClient.MultiIngest(ctx, leader.GetStoreId(), req) + return resp, errors.Trace(err) } diff --git a/pkg/restore/systable_restore.go b/pkg/restore/systable_restore.go index 81e19e92e..1e60637b1 100644 --- a/pkg/restore/systable_restore.go +++ b/pkg/restore/systable_restore.go @@ -29,6 +29,17 @@ var statsTables = map[string]struct{}{ "stats_top_n": {}, } +var unRecoverableTable = map[string]struct{}{ + // some variables in tidb (e.g. gc_safe_point) cannot be recovered. + "tidb": {}, + "global_variables": {}, +} + +func isUnrecoverableTable(tableName string) bool { + _, ok := unRecoverableTable[tableName] + return ok +} + func isStatsTable(tableName string) bool { _, ok := statsTables[tableName] return ok @@ -42,7 +53,7 @@ func (rc *Client) RestoreSystemSchemas(ctx context.Context, f filter.Filter) { temporaryDB := utils.TemporaryDBName(sysDB) defer rc.cleanTemporaryDatabase(ctx, sysDB) - if !f.MatchSchema(temporaryDB.O) { + if !f.MatchSchema(sysDB) { log.Debug("system database filtered out", zap.String("database", sysDB)) return } @@ -68,8 +79,8 @@ func (rc *Client) RestoreSystemSchemas(ctx context.Context, f filter.Filter) { zap.Stringer("table", tableName), ) } + tablesRestored = append(tablesRestored, tableName.L) } - tablesRestored = append(tablesRestored, tableName.L) } if err := rc.afterSystemTablesReplaced(ctx, tablesRestored); err != nil { for _, e := range multierr.Errors(err) { @@ -155,6 +166,10 @@ func (rc *Client) replaceTemporaryTableToSystable(ctx context.Context, tableName "the table ID is out-of-date and may corrupt existing statistics") } + if isUnrecoverableTable(tableName) { + return berrors.ErrUnsupportedSystemTable.GenWithStack("restoring unsupported `mysql` schema table") + } + if db.ExistingTables[tableName] != nil { log.Info("table existing, using replace into for restore", zap.String("table", tableName), diff --git a/pkg/storage/gcs.go b/pkg/storage/gcs.go index 2becf7733..230ab0a81 100644 --- a/pkg/storage/gcs.go +++ b/pkg/storage/gcs.go @@ -111,7 +111,9 @@ func (s *gcsStorage) ReadFile(ctx context.Context, name string) ([]byte, error) object := s.objectName(name) rc, err := s.bucket.Object(object).NewReader(ctx) if err != nil { - return nil, errors.Trace(err) + return nil, errors.Annotatef(err, + "failed to read gcs file, file info: input.bucket='%s', input.key='%s'", + s.gcs.Bucket, object) } defer rc.Close() diff --git a/pkg/storage/storage.go b/pkg/storage/storage.go index da8fdf5ee..6a9e03d01 100644 --- a/pkg/storage/storage.go +++ b/pkg/storage/storage.go @@ -137,9 +137,6 @@ func New(ctx context.Context, backend *backuppb.StorageBackend, opts *ExternalSt if backend.Local == nil { return nil, errors.Annotate(berrors.ErrStorageInvalidConfig, "local config not found") } - if opts.SkipCheckPath { - return &LocalStorage{base: backend.Local.Path}, nil - } return NewLocalStorage(backend.Local.Path) case *backuppb.StorageBackend_S3: if backend.S3 == nil { diff --git a/pkg/summary/collector.go b/pkg/summary/collector.go index 3e3264d16..aac2fc1d9 100644 --- a/pkg/summary/collector.go +++ b/pkg/summary/collector.go @@ -23,6 +23,10 @@ const ( TotalKV = "total kv" // TotalBytes is a field we collect during backup/restore TotalBytes = "total bytes" + // BackupDataSize is a field we collect after backup finish + BackupDataSize = "backup data size(after compressed)" + // RestoreDataSize is a field we collection after restore finish + RestoreDataSize = "restore data size(after decompressed)" ) // LogCollector collects infos into summary log. @@ -200,10 +204,28 @@ func (tc *logCollector) Summary(name string) { for name, data := range tc.successData { if name == TotalBytes { logFields = append(logFields, - zap.String("data-size", units.HumanSize(float64(data))), + zap.String("total-kv-size", units.HumanSize(float64(data))), zap.String("average-speed", units.HumanSize(float64(data)/totalCost.Seconds())+"/s")) continue } + if name == BackupDataSize { + if tc.failureUnitCount+tc.successUnitCount == 0 { + logFields = append(logFields, zap.String("Result", "Nothing to bakcup")) + } else { + logFields = append(logFields, + zap.String(BackupDataSize, units.HumanSize(float64(data)))) + } + continue + } + if name == RestoreDataSize { + if tc.failureUnitCount+tc.successUnitCount == 0 { + logFields = append(logFields, zap.String("Result", "Nothing to restore")) + } else { + logFields = append(logFields, + zap.String(RestoreDataSize, units.HumanSize(float64(data)))) + } + continue + } logFields = append(logFields, zap.Uint64(logKeyFor(name), data)) } diff --git a/pkg/task/backup.go b/pkg/task/backup.go index 8b1e1b6ac..9159060ec 100644 --- a/pkg/task/backup.go +++ b/pkg/task/backup.go @@ -19,8 +19,8 @@ import ( "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/sessionctx/stmtctx" - "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/statistics/handle" + "github.com/pingcap/tidb/store/tikv/oracle" "github.com/pingcap/tidb/types" "github.com/spf13/pflag" "go.uber.org/zap" @@ -434,7 +434,7 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig return errors.Trace(err) } - g.Record("Size", utils.ArchiveSize(&backupMeta)) + g.Record(summary.BackupDataSize, utils.ArchiveSize(&backupMeta)) // Set task summary to success status. summary.SetSuccessStatus(true) @@ -476,7 +476,7 @@ func parseTSString(ts string) (uint64, error) { if err != nil { return 0, errors.Trace(err) } - return variable.GoTimeToTS(t1), nil + return oracle.GoTimeToTS(t1), nil } func parseCompressionType(s string) (backuppb.CompressionType, error) { diff --git a/pkg/task/backup_raw.go b/pkg/task/backup_raw.go index 0756ebc8a..8b982bb09 100644 --- a/pkg/task/backup_raw.go +++ b/pkg/task/backup_raw.go @@ -229,7 +229,7 @@ func RunBackupRaw(c context.Context, g glue.Glue, cmdName string, cfg *RawKvConf return errors.Trace(err) } - g.Record("Size", utils.ArchiveSize(&backupMeta)) + g.Record(summary.BackupDataSize, utils.ArchiveSize(&backupMeta)) // Set task summary to success status. summary.SetSuccessStatus(true) diff --git a/pkg/task/common.go b/pkg/task/common.go index 6d69ca69a..907a64551 100644 --- a/pkg/task/common.go +++ b/pkg/task/common.go @@ -30,6 +30,7 @@ import ( berrors "github.com/pingcap/br/pkg/errors" "github.com/pingcap/br/pkg/glue" "github.com/pingcap/br/pkg/storage" + "github.com/pingcap/br/pkg/utils" ) const ( @@ -139,6 +140,10 @@ type Config struct { TableFilter filter.Filter `json:"-" toml:"-"` SwitchModeInterval time.Duration `json:"switch-mode-interval" toml:"switch-mode-interval"` + // Schemas is a database name set, to check whether the restore database has been backup + Schemas map[string]struct{} + // Tables is a table name set, to check whether the restore table has been backup + Tables map[string]struct{} // GrpcKeepaliveTime is the interval of pinging the server. GRPCKeepaliveTime time.Duration `json:"grpc-keepalive-time" toml:"grpc-keepalive-time"` @@ -280,6 +285,8 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error { } cfg.RateLimit = rateLimit * rateLimitUnit + cfg.Schemas = make(map[string]struct{}) + cfg.Tables = make(map[string]struct{}) var caseSensitive bool if filterFlag := flags.Lookup(flagFilter); filterFlag != nil { var f filter.Filter @@ -297,11 +304,13 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error { if len(db) == 0 { return errors.Annotate(berrors.ErrInvalidArgument, "empty database name is not allowed") } + cfg.Schemas[utils.EncloseName(db)] = struct{}{} if tblFlag := flags.Lookup(flagTable); tblFlag != nil { tbl := tblFlag.Value.String() if len(tbl) == 0 { return errors.Annotate(berrors.ErrInvalidArgument, "empty table name is not allowed") } + cfg.Tables[utils.EncloseDBAndTable(db, tbl)] = struct{}{} cfg.TableFilter = filter.NewTablesFilter(filter.Table{ Schema: db, Name: tbl, diff --git a/pkg/task/restore.go b/pkg/task/restore.go index 625c82b8c..91f6ab803 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -150,6 +150,37 @@ func (cfg *RestoreConfig) adjustRestoreConfig() { } } +// CheckRestoreDBAndTable is used to check whether the restore dbs or tables have been backup +func CheckRestoreDBAndTable(client *restore.Client, cfg *RestoreConfig) error { + if len(cfg.Schemas) == 0 && len(cfg.Tables) == 0 { + return nil + } + schemas := client.GetDatabases() + schemasMap := make(map[string]struct{}) + tablesMap := make(map[string]struct{}) + for _, db := range schemas { + schemasMap[utils.EncloseName(db.Info.Name.O)] = struct{}{} + for _, table := range db.Tables { + tablesMap[utils.EncloseDBAndTable(db.Info.Name.O, table.Info.Name.O)] = struct{}{} + } + } + restoreSchemas := cfg.Schemas + restoreTables := cfg.Tables + for schema := range restoreSchemas { + if _, ok := schemasMap[schema]; !ok { + return errors.Annotatef(berrors.ErrUndefinedRestoreDbOrTable, + "[database: %v] has not been backup, please ensure you has input a correct database name", schema) + } + } + for table := range restoreTables { + if _, ok := tablesMap[table]; !ok { + return errors.Annotatef(berrors.ErrUndefinedRestoreDbOrTable, + "[table: %v] has not been backup, please ensure you has input a correct table name", table) + } + } + return nil +} + // RunRestore starts a restore task inside the current goroutine. func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConfig) error { cfg.adjustRestoreConfig() @@ -210,7 +241,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err != nil { return errors.Trace(err) } - g.Record("Size", utils.ArchiveSize(backupMeta)) + g.Record(summary.RestoreDataSize, utils.ArchiveSize(backupMeta)) backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) if cfg.CheckRequirements && backupVersion != nil { if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { @@ -218,14 +249,16 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } } - if err = client.InitBackupMeta(backupMeta, u); err != nil { + if err = client.InitBackupMeta(c, backupMeta, u); err != nil { return errors.Trace(err) } if client.IsRawKvMode() { return errors.Annotate(berrors.ErrRestoreModeMismatch, "cannot do transactional restore from raw kv data") } - + if err = CheckRestoreDBAndTable(client, cfg); err != nil { + return err + } files, tables, dbs := filterRestoreFiles(client, cfg) if len(dbs) == 0 && len(tables) != 0 { return errors.Annotate(berrors.ErrRestoreInvalidBackup, "contain tables but no databases") @@ -439,7 +472,6 @@ func filterRestoreFiles( if !cfg.TableFilter.MatchTable(db.Info.Name.O, table.Info.Name.O) { continue } - if !createdDatabase { dbs = append(dbs, db) createdDatabase = true diff --git a/pkg/task/restore_raw.go b/pkg/task/restore_raw.go index 2b9225a07..04621cbe2 100644 --- a/pkg/task/restore_raw.go +++ b/pkg/task/restore_raw.go @@ -92,8 +92,8 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR if err != nil { return errors.Trace(err) } - g.Record("Size", utils.ArchiveSize(backupMeta)) - if err = client.InitBackupMeta(backupMeta, u); err != nil { + g.Record(summary.RestoreDataSize, utils.ArchiveSize(backupMeta)) + if err = client.InitBackupMeta(c, backupMeta, u); err != nil { return errors.Trace(err) } diff --git a/pkg/utils/permission.go b/pkg/utils/permission.go new file mode 100644 index 000000000..e18c28dbb --- /dev/null +++ b/pkg/utils/permission.go @@ -0,0 +1,20 @@ +package utils + +import "strings" + +var ( + ioNotFoundMsg = "notfound" + permissionDeniedMsg = "permissiondenied" +) + +// MessageIsNotFoundStorageError checks whether the message returning from TiKV is "NotFound" storage I/O error +func MessageIsNotFoundStorageError(msg string) bool { + msgLower := strings.ToLower(msg) + return strings.Contains(msgLower, "io") && strings.Contains(msgLower, ioNotFoundMsg) +} + +// MessageIsPermissionDeniedStorageError checks whether the message returning from TiKV is "PermissionDenied" storage I/O error +func MessageIsPermissionDeniedStorageError(msg string) bool { + msgLower := strings.ToLower(msg) + return strings.Contains(msgLower, permissionDeniedMsg) +} diff --git a/tests/_utils/run_services b/tests/_utils/run_services index ad39b2f77..ed568a955 100644 --- a/tests/_utils/run_services +++ b/tests/_utils/run_services @@ -122,6 +122,10 @@ kv_outage() { fi start_tikv "$i" done + # let tikv start up completely if backup is finished before tikv restarts + ensure_tikv + # sometimes even though a tikv node is stopped, pd also show is_intialized in ensure_tikv + sleep 1 } } diff --git a/tests/br_clustered_index/run.sh b/tests/br_clustered_index/run.sh index 5c065f8a7..86e5ce263 100755 --- a/tests/br_clustered_index/run.sh +++ b/tests/br_clustered_index/run.sh @@ -19,6 +19,8 @@ TABLE="usertable" run_sql "CREATE DATABASE $DB;" +table_names=${cases:-'t0 t1 t2 t_bit t_bool t_tinyint t_smallint t_mediumint t_int t_date t_time t_datetime t_timestamp t_year t_char t_varcher t_text t_binary t_blob t_enum t_set t8 t9 t10 t11 t12'} + run_sql " USE $DB; @@ -165,6 +167,13 @@ clustered_table_count=$(run_sql "\ echo "backup start..." run_br --pd $PD_ADDR backup db -s "local://$TEST_DIR/$DB" --db $DB --ratelimit 5 --concurrency 4 +# count +echo "count rows..." +row_counts=() +for table_name in $table_names; do + row_counts+=($(run_sql "SELECT COUNT(*) FROM $DB.$table_name;" | awk '/COUNT/{print $2}')) +done + run_sql "DROP DATABASE $DB;" run_sql "CREATE DATABASE $DB;" @@ -172,4 +181,16 @@ run_sql "CREATE DATABASE $DB;" echo "restore start..." run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR +# check count +echo "check count..." +idx=0 +for table_name in $table_names; do + row_count=$(run_sql "SELECT COUNT(*) FROM $DB.$table_name;" | awk '/COUNT/{print $2}') + if [[ $row_count -ne ${row_counts[$idx]} ]]; then + echo "Lost some rows in table $table_name. Expect ${row_counts[$idx]}; Get $row_count." + exit 1 + fi + idx=$(( $idx + 1 )) +done + run_sql "DROP DATABASE $DB;" diff --git a/tests/br_key_locked/locker.go b/tests/br_key_locked/locker.go index 8748f35bd..3e2d82d57 100644 --- a/tests/br_key_locked/locker.go +++ b/tests/br_key_locked/locker.go @@ -322,7 +322,7 @@ func (c *Locker) lockBatch(ctx context.Context, keys [][]byte, primary []byte) ( return 0, errors.Trace(err) } if regionErr != nil { - err = bo.Backoff(tikv.BoRegionMiss, errors.New(regionErr.String())) + err = bo.Backoff(tikv.BoRegionMiss(), errors.New(regionErr.String())) if err != nil { return 0, errors.Trace(err) } diff --git a/tests/br_log_test/run.sh b/tests/br_log_test/run.sh new file mode 100644 index 000000000..3a2643a15 --- /dev/null +++ b/tests/br_log_test/run.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# +# Copyright 2019 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu +DB="$TEST_NAME" +TABLE="usertable" +DB_COUNT=3 + +for i in $(seq $DB_COUNT); do + run_sql "CREATE DATABASE $DB${i};" + go-ycsb load mysql -P tests/$TEST_NAME/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB${i} +done + +for i in $(seq $DB_COUNT); do + row_count_ori[${i}]=$(run_sql "SELECT COUNT(*) FROM $DB${i}.$TABLE;" | awk '/COUNT/{print $2}') +done + +echo "backup with tikv permission error start..." +export GO_FAILPOINTS="github.com/pingcap/br/pkg/backup/tikv-rw-error=return(\"Io(Os { code: 13, kind: PermissionDenied...})\")" +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB-tikverr" || echo "br log test done!" +export GO_FAILPOINTS="" + +echo "backup with tikv file or directory not found error start..." +export GO_FAILPOINTS="github.com/pingcap/br/pkg/backup/tikv-rw-error=return(\"Io(Os { code: 2, kind:NotFound...})\")" +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB-tikverr2" || echo "br log test done!" +export GO_FAILPOINTS="" + + +for i in $(seq $DB_COUNT); do + run_sql "DROP DATABASE $DB${i};" +done diff --git a/tests/br_log_test/workload b/tests/br_log_test/workload new file mode 100644 index 000000000..664fe7ee8 --- /dev/null +++ b/tests/br_log_test/workload @@ -0,0 +1,12 @@ +recordcount=1000 +operationcount=0 +workload=core + +readallfields=true + +readproportion=0 +updateproportion=0 +scanproportion=0 +insertproportion=0 + +requestdistribution=uniform diff --git a/tests/br_systables/run.sh b/tests/br_systables/run.sh index 250c1ec44..94ee6d7a7 100644 --- a/tests/br_systables/run.sh +++ b/tests/br_systables/run.sh @@ -36,8 +36,9 @@ rollback_modify() { check() { run_sql "SELECT count(*) from mysql.foo;" | grep 11 run_sql "SELECT count(*) from mysql.usertable;" | grep 1000 - run_sql "SHOW TABLES IN mysql;" | grep -v bar - run_sql "SELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'" | grep "1h" + run_sql "SHOW TABLES IN mysql;" | awk '/bar/{exit 1}' + # we cannot let user overwrite `mysql.tidb` through br in any time. + run_sql "SELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'" | awk '/1h/{exit 1}' # TODO remove this after supporting auto flush. run_sql "FLUSH PRIVILEGES;" diff --git a/tests/lightning_alter_random/run.sh b/tests/lightning_alter_random/run.sh index 79fd2735c..b2141ab40 100644 --- a/tests/lightning_alter_random/run.sh +++ b/tests/lightning_alter_random/run.sh @@ -18,6 +18,9 @@ set -eu # FIXME: auto-random is only stable on master currently. check_cluster_version 4 0 0 AUTO_RANDOM || exit 0 +# test lightning with autocommit disabled +run_sql "SET @@global.autocommit = '0';" + for backend in tidb importer local; do if [ "$backend" = 'local' ]; then check_cluster_version 4 0 0 'local backend' || continue @@ -35,7 +38,7 @@ for backend in tidb importer local; do check_contains 'inc: 3' # auto random base is 4 - run_sql "INSERT INTO alter_random.t VALUES ();" + run_sql "INSERT INTO alter_random.t VALUES ();commit;" run_sql "SELECT id & b'000001111111111111111111111111111111111111111111111111111111111' as inc FROM alter_random.t" if [ "$backend" = 'tidb' ]; then check_contains 'inc: 30002' @@ -43,3 +46,5 @@ for backend in tidb importer local; do check_contains 'inc: 4' fi done + +run_sql "SET @@global.autocommit = '1';" diff --git a/tests/lightning_checkpoint/run.sh b/tests/lightning_checkpoint/run.sh index d2289b414..f4bbede37 100755 --- a/tests/lightning_checkpoint/run.sh +++ b/tests/lightning_checkpoint/run.sh @@ -110,7 +110,7 @@ for BACKEND in importer local; do run_lightning -d "$DBPATH" --backend $BACKEND --enable-checkpoint=1 run_sql "$PARTIAL_IMPORT_QUERY" check_contains "s: $(( (1000 * $CHUNK_COUNT + 1001) * $CHUNK_COUNT * $TABLE_COUNT ))" - run_sql 'SELECT count(*) FROM `tidb_lightning_checkpoint_test_cppk.1357924680.bak`.table_v6 WHERE status >= 200' + run_sql 'SELECT count(*) FROM `tidb_lightning_checkpoint_test_cppk.1357924680.bak`.table_v7 WHERE status >= 200' check_contains "count(*): $TABLE_COUNT" # Ensure there is no dangling open engines diff --git a/tests/lightning_checkpoint_chunks/run.sh b/tests/lightning_checkpoint_chunks/run.sh index f7b7cb92e..d06adfd9b 100755 --- a/tests/lightning_checkpoint_chunks/run.sh +++ b/tests/lightning_checkpoint_chunks/run.sh @@ -32,7 +32,7 @@ verify_checkpoint_noop() { run_sql 'SELECT count(i), sum(i) FROM cpch_tsr.tbl;' check_contains "count(i): $(($ROW_COUNT*$CHUNK_COUNT))" check_contains "sum(i): $(( $ROW_COUNT*$CHUNK_COUNT*(($CHUNK_COUNT+2)*$ROW_COUNT + 1)/2 ))" - run_sql 'SELECT count(*) FROM `tidb_lightning_checkpoint_test_cpch.1234567890.bak`.table_v6 WHERE status >= 200' + run_sql 'SELECT count(*) FROM `tidb_lightning_checkpoint_test_cpch.1234567890.bak`.table_v7 WHERE status >= 200' check_contains "count(*): 1" } diff --git a/tests/lightning_checkpoint_parquet/run.sh b/tests/lightning_checkpoint_parquet/run.sh index 9f1004095..31666bd3b 100755 --- a/tests/lightning_checkpoint_parquet/run.sh +++ b/tests/lightning_checkpoint_parquet/run.sh @@ -41,9 +41,9 @@ set +e run_lightning -d "$DBPATH" --backend tidb --enable-checkpoint=1 2> /dev/null set -e run_sql 'SELECT count(*), sum(iVal) FROM `cppq_tsr`.tbl' -check_contains "count(*): 32" -# sum(0..31) -check_contains "sum(iVal): 496" +check_contains "count(*): 1" +# sum(0) +check_contains "sum(iVal): 0" # check chunk offset and update checkpoint current row id to a higher value so that # if parse read from start, the generated rows will be different diff --git a/tests/lightning_error_summary/data/error_summary.c.sql b/tests/lightning_error_summary/data/error_summary.c.sql index be11c04ab..4ed9e54a4 100644 --- a/tests/lightning_error_summary/data/error_summary.c.sql +++ b/tests/lightning_error_summary/data/error_summary.c.sql @@ -1 +1 @@ -INSERT INTO c VALUES (10, 100), (1000, 10000); +INSERT INTO c VALUES (3, 100), (1000, 10000); diff --git a/tests/lightning_incremental/config.toml b/tests/lightning_incremental/config.toml new file mode 100644 index 000000000..e69de29bb diff --git a/tests/lightning_incremental/data/incr-schema-create.sql b/tests/lightning_incremental/data/incr-schema-create.sql new file mode 100644 index 000000000..624892540 --- /dev/null +++ b/tests/lightning_incremental/data/incr-schema-create.sql @@ -0,0 +1 @@ +create database `incr`; diff --git a/tests/lightning_incremental/data/incr.auto_random-schema.sql b/tests/lightning_incremental/data/incr.auto_random-schema.sql new file mode 100644 index 000000000..028c7c9d9 --- /dev/null +++ b/tests/lightning_incremental/data/incr.auto_random-schema.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +CREATE TABLE `auto_random` ( + `id` bigint primary key clustered auto_random, + v varchar(255) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data/incr.auto_random.sql b/tests/lightning_incremental/data/incr.auto_random.sql new file mode 100644 index 000000000..d4357822b --- /dev/null +++ b/tests/lightning_incremental/data/incr.auto_random.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `auto_random` (`v`) VALUES +("a"), +("b"), +("c"); diff --git a/tests/lightning_incremental/data/incr.pk_auto_inc-schema.sql b/tests/lightning_incremental/data/incr.pk_auto_inc-schema.sql new file mode 100644 index 000000000..52e876978 --- /dev/null +++ b/tests/lightning_incremental/data/incr.pk_auto_inc-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE `auto_random` ( + `id` bigint PRIMARY KEY AUTO_INCREMENT, + v varchar(255) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data/incr.pk_auto_inc.sql b/tests/lightning_incremental/data/incr.pk_auto_inc.sql new file mode 100644 index 000000000..ac85444a5 --- /dev/null +++ b/tests/lightning_incremental/data/incr.pk_auto_inc.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `pk_auto_inc` (`v`) VALUES +("a"), +("b"), +("c"); diff --git a/tests/lightning_incremental/data/incr.rowid_uk_inc-schema.sql b/tests/lightning_incremental/data/incr.rowid_uk_inc-schema.sql new file mode 100644 index 000000000..c1ace8ba9 --- /dev/null +++ b/tests/lightning_incremental/data/incr.rowid_uk_inc-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE `rowid_uk_inc` ( + `id` bigint UNIQUE KEY AUTO_INCREMENT, + v varchar(16) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data/incr.rowid_uk_inc.sql b/tests/lightning_incremental/data/incr.rowid_uk_inc.sql new file mode 100644 index 000000000..b90acb9b1 --- /dev/null +++ b/tests/lightning_incremental/data/incr.rowid_uk_inc.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `rowid_uk_inc` (`v`) VALUES +('a'), +('b'), +('c'); diff --git a/tests/lightning_incremental/data/incr.uk_auto_inc-schema.sql b/tests/lightning_incremental/data/incr.uk_auto_inc-schema.sql new file mode 100644 index 000000000..3901d7ed3 --- /dev/null +++ b/tests/lightning_incremental/data/incr.uk_auto_inc-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE `uk_auto_inc` ( + `pk` int PRIMARY KEY, + `id` bigint UNIQUE KEY AUTO_INCREMENT +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data/incr.uk_auto_inc.sql b/tests/lightning_incremental/data/incr.uk_auto_inc.sql new file mode 100644 index 000000000..4b1e7b134 --- /dev/null +++ b/tests/lightning_incremental/data/incr.uk_auto_inc.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `uk_auto_inc` (`pk`) VALUES +(1), +(2), +(3); diff --git a/tests/lightning_incremental/data1/incr-schema-create.sql b/tests/lightning_incremental/data1/incr-schema-create.sql new file mode 100644 index 000000000..624892540 --- /dev/null +++ b/tests/lightning_incremental/data1/incr-schema-create.sql @@ -0,0 +1 @@ +create database `incr`; diff --git a/tests/lightning_incremental/data1/incr.auto_random-schema.sql b/tests/lightning_incremental/data1/incr.auto_random-schema.sql new file mode 100644 index 000000000..028c7c9d9 --- /dev/null +++ b/tests/lightning_incremental/data1/incr.auto_random-schema.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +CREATE TABLE `auto_random` ( + `id` bigint primary key clustered auto_random, + v varchar(255) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data1/incr.auto_random.sql b/tests/lightning_incremental/data1/incr.auto_random.sql new file mode 100644 index 000000000..7e89d09b5 --- /dev/null +++ b/tests/lightning_incremental/data1/incr.auto_random.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `auto_random` (`v`) VALUES +("d"), +("e"), +("f"); diff --git a/tests/lightning_incremental/data1/incr.pk_auto_inc-schema.sql b/tests/lightning_incremental/data1/incr.pk_auto_inc-schema.sql new file mode 100644 index 000000000..52e876978 --- /dev/null +++ b/tests/lightning_incremental/data1/incr.pk_auto_inc-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE `auto_random` ( + `id` bigint PRIMARY KEY AUTO_INCREMENT, + v varchar(255) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data1/incr.pk_auto_inc.sql b/tests/lightning_incremental/data1/incr.pk_auto_inc.sql new file mode 100644 index 000000000..5a0ab087d --- /dev/null +++ b/tests/lightning_incremental/data1/incr.pk_auto_inc.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `pk_auto_inc` (`v`) VALUES +("d"), +("e"), +("f"); diff --git a/tests/lightning_incremental/data1/incr.rowid_uk_inc-schema.sql b/tests/lightning_incremental/data1/incr.rowid_uk_inc-schema.sql new file mode 100644 index 000000000..c9bc49801 --- /dev/null +++ b/tests/lightning_incremental/data1/incr.rowid_uk_inc-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE `uk_auto_inc` ( + `id` bigint UNIQUE KEY AUTO_INCREMENT, + v varchar(16) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data1/incr.rowid_uk_inc.sql b/tests/lightning_incremental/data1/incr.rowid_uk_inc.sql new file mode 100644 index 000000000..f4ab9a5a7 --- /dev/null +++ b/tests/lightning_incremental/data1/incr.rowid_uk_inc.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `rowid_uk_inc` (`v`) VALUES +("d"), +("e"), +("f"); diff --git a/tests/lightning_incremental/data1/incr.uk_auto_inc-schema.sql b/tests/lightning_incremental/data1/incr.uk_auto_inc-schema.sql new file mode 100644 index 000000000..3901d7ed3 --- /dev/null +++ b/tests/lightning_incremental/data1/incr.uk_auto_inc-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE `uk_auto_inc` ( + `pk` int PRIMARY KEY, + `id` bigint UNIQUE KEY AUTO_INCREMENT +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/lightning_incremental/data1/incr.uk_auto_inc.sql b/tests/lightning_incremental/data1/incr.uk_auto_inc.sql new file mode 100644 index 000000000..31d87c135 --- /dev/null +++ b/tests/lightning_incremental/data1/incr.uk_auto_inc.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `uk_auto_inc` (`pk`) VALUES +(4), +(5), +(6); diff --git a/tests/lightning_incremental/run.sh b/tests/lightning_incremental/run.sh new file mode 100644 index 000000000..bf8ccde57 --- /dev/null +++ b/tests/lightning_incremental/run.sh @@ -0,0 +1,76 @@ +#!/bin/sh +# +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu + +check_cluster_version 4 0 0 "incremental restore" || exit 0 + +DB_NAME=incr + +for backend in importer local; do + run_sql "DROP DATABASE IF EXISTS incr;" + run_lightning --backend $backend + + for tbl in auto_random pk_auto_inc rowid_uk_inc uk_auto_inc; do + run_sql "SELECT count(*) from incr.$tbl" + check_contains "count(*): 3" + done + + for tbl in auto_random pk_auto_inc rowid_uk_inc uk_auto_inc; do + if [ "$tbl" = "auto_random" ]; then + run_sql "SELECT id & b'000001111111111111111111111111111111111111111111111111111111111' as inc FROM incr.$tbl" + else + run_sql "SELECT id as inc FROM incr.$tbl" + fi + check_contains 'inc: 1' + check_contains 'inc: 2' + check_contains 'inc: 3' + done + + for tbl in pk_auto_inc rowid_uk_inc; do + run_sql "SELECT group_concat(v) from incr.$tbl group by 'all';" + check_contains "group_concat(v): a,b,c" + done + + run_sql "SELECT sum(pk) from incr.uk_auto_inc;" + check_contains "sum(pk): 6" + + # incrementally import all data in data1 + run_lightning --backend $backend -d "tests/$TEST_NAME/data1" + + for tbl in auto_random pk_auto_inc rowid_uk_inc uk_auto_inc; do + run_sql "SELECT count(*) from incr.$tbl" + check_contains "count(*): 6" + done + + for tbl in auto_random pk_auto_inc rowid_uk_inc uk_auto_inc; do + if [ "$tbl" = "auto_random" ]; then + run_sql "SELECT id & b'000001111111111111111111111111111111111111111111111111111111111' as inc FROM incr.$tbl" + else + run_sql "SELECT id as inc FROM incr.$tbl" + fi + check_contains 'inc: 4' + check_contains 'inc: 5' + check_contains 'inc: 6' + done + + for tbl in pk_auto_inc rowid_uk_inc; do + run_sql "SELECT group_concat(v) from incr.$tbl group by 'all';" + check_contains "group_concat(v): a,b,c,d,e,f" + done + + run_sql "SELECT sum(pk) from incr.uk_auto_inc;" + check_contains "sum(pk): 21" +done diff --git a/tests/lightning_local_backend/run.sh b/tests/lightning_local_backend/run.sh index e43fd04fe..cfecd3e72 100755 --- a/tests/lightning_local_backend/run.sh +++ b/tests/lightning_local_backend/run.sh @@ -58,7 +58,7 @@ run_sql 'DROP DATABASE cpeng;' rm -f "/tmp/tidb_lightning_checkpoint_local_backend_test.pb" set +e -export GO_FAILPOINTS='github.com/pingcap/br/pkg/lightning/restore/FailBeforeDataEngineImported=return' +export GO_FAILPOINTS='github.com/pingcap/br/pkg/lightning/restore/FailIfStatusBecomes=return(90);' for i in $(seq "$ENGINE_COUNT"); do echo "******** Importing Table Now (step $i/$ENGINE_COUNT) ********" run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" diff --git a/tests/lightning_tidb_rowid/run.sh b/tests/lightning_tidb_rowid/run.sh index 4397c2679..395c21978 100755 --- a/tests/lightning_tidb_rowid/run.sh +++ b/tests/lightning_tidb_rowid/run.sh @@ -57,13 +57,8 @@ for BACKEND in local importer tidb; do run_sql 'SELECT count(*), min(_tidb_rowid), max(_tidb_rowid) FROM rowid.pre_rebase' check_contains 'count(*): 1' - if [ "$BACKEND" == 'tidb' ]; then - check_contains 'min(_tidb_rowid): 70000' - check_contains 'max(_tidb_rowid): 70000' - else - check_contains 'min(_tidb_rowid): 1' - check_contains 'max(_tidb_rowid): 1' - fi + check_contains 'min(_tidb_rowid): 70000' + check_contains 'max(_tidb_rowid): 70000' run_sql 'INSERT INTO rowid.pre_rebase VALUES ("?")' run_sql 'SELECT _tidb_rowid > 70000 FROM rowid.pre_rebase WHERE pk = "?"' check_contains '_tidb_rowid > 70000: 1' diff --git a/tests/run.sh b/tests/run.sh index e2b9cf307..be8c1b57e 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -41,8 +41,6 @@ fi echo "selected test cases: $SELECTED_TEST_NAME" -# disable cluster index by default -run_sql 'set @@global.tidb_enable_clustered_index = 0' || echo "tidb does not support cluster index yet, skipped!" # wait for global variable cache invalid sleep 2 @@ -62,5 +60,5 @@ for casename in $SELECTED_TEST_NAME; do TIDB_STATUS_ADDR="$TIDB_STATUS_ADDR" \ TIKV_ADDR="$TIKV_ADDR" \ BR_LOG_TO_TERM=1 \ - bash "$script" && echo "TEST: [$TEST_NAME] success!" + bash "$script" && echo "TEST: [$casename] success!" done diff --git a/tests/run_compatible.sh b/tests/run_compatible.sh index 98086fa23..9fcb2446c 100755 --- a/tests/run_compatible.sh +++ b/tests/run_compatible.sh @@ -19,7 +19,8 @@ set -eu -source ${BASH_SOURCE[0]%/*}/../compatibility/prepare_backup.sh +source ${BASH_SOURCE[0]%/*}/../compatibility/get_last_tags.sh +getLatestTags echo "start test on $TAGS" EXPECTED_KVS=1000 diff --git a/tests/up.sh b/tests/up.sh index 92b9eb8bf..b7b17558f 100755 --- a/tests/up.sh +++ b/tests/up.sh @@ -117,14 +117,14 @@ FROM minio/minio AS minio-builder FROM minio/mc AS mc-builder FROM fsouza/fake-gcs-server AS gcs-builder -FROM golang:1.13.8-buster as ycsb-builder +FROM golang:1.16.4-buster as ycsb-builder WORKDIR /go/src/github.com/pingcap/ RUN git clone https://github.com/pingcap/go-ycsb.git && \ cd go-ycsb && \ make && \ cp bin/go-ycsb /go-ycsb -FROM golang:1.13.8-buster +FROM golang:1.16.4-buster RUN apt-get update && apt-get install -y --no-install-recommends \ git \ diff --git a/tidb-lightning.toml b/tidb-lightning.toml index 68482b4fb..c019a1265 100644 --- a/tidb-lightning.toml +++ b/tidb-lightning.toml @@ -32,6 +32,10 @@ table-concurrency = 6 # adjusted according to monitoring. # Ref: https://en.wikipedia.org/wiki/Disk_buffer#Read-ahead/read-behind # io-concurrency = 5 +# meta-schema-name is (database name) to store lightning task and table metadata. +# the meta schema and tables is store in target tidb cluster. +# this config is only used in "local" and "importer" backend. +# meta-schema-name = "lightning_metadata" # logging level = "info" diff --git a/tools/go.mod b/tools/go.mod index 5bc9400f2..9596ffe34 100644 --- a/tools/go.mod +++ b/tools/go.mod @@ -1,6 +1,6 @@ module github.com/pingcap/br/_tools -go 1.13 +go 1.16 require ( github.com/dnephin/govet v0.0.0-20171012192244-4a96d43e39d3 diff --git a/web/go.mod b/web/go.mod index 081344948..01c3f3f65 100644 --- a/web/go.mod +++ b/web/go.mod @@ -2,4 +2,4 @@ module github.com/pingcap/br/pkg/lightning/web -go 1.13 +go 1.16