From 69c4d71fe71671a366695f683e0fa3bd7ed05fc8 Mon Sep 17 00:00:00 2001 From: dsdashun Date: Sun, 29 Jan 2023 11:04:08 +0800 Subject: [PATCH] lightning: support files with BOM header --- br/pkg/lightning/mydump/parser.go | 9 ++++- br/pkg/lightning/mydump/reader.go | 3 +- br/tests/lightning_bom_file/config.toml | 2 + .../data/mytest.testtbl-schema.sql | 5 +++ .../data/mytest.testtbl.csv | 6 +++ br/tests/lightning_bom_file/run.sh | 38 +++++++++++++++++++ go.mod | 1 + go.sum | 2 + 8 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 br/tests/lightning_bom_file/config.toml create mode 100644 br/tests/lightning_bom_file/data/mytest.testtbl-schema.sql create mode 100644 br/tests/lightning_bom_file/data/mytest.testtbl.csv create mode 100755 br/tests/lightning_bom_file/run.sh diff --git a/br/pkg/lightning/mydump/parser.go b/br/pkg/lightning/mydump/parser.go index 512c3789cfa7f..0ac82ce189d71 100644 --- a/br/pkg/lightning/mydump/parser.go +++ b/br/pkg/lightning/mydump/parser.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/worker" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/types" + "github.com/spkg/bom" "go.uber.org/zap" "go.uber.org/zap/zapcore" ) @@ -285,7 +286,13 @@ func (parser *blockParser) readBlock() error { parser.remainBuf.Write(parser.buf) parser.appendBuf.Reset() parser.appendBuf.Write(parser.remainBuf.Bytes()) - parser.appendBuf.Write(parser.blockBuf[:n]) + blockData := parser.blockBuf[:n] + if parser.pos == 0 { + bomCleanedData := bom.Clean(blockData) + parser.pos += int64(n - len(bomCleanedData)) + blockData = bomCleanedData + } + parser.appendBuf.Write(blockData) parser.buf = parser.appendBuf.Bytes() if parser.metrics != nil { parser.metrics.ChunkParserReadBlockSecondsHistogram.Observe(time.Since(startTime).Seconds()) diff --git a/br/pkg/lightning/mydump/reader.go b/br/pkg/lightning/mydump/reader.go index 4837b35aceab2..3735e97cb48ee 100644 --- a/br/pkg/lightning/mydump/reader.go +++ b/br/pkg/lightning/mydump/reader.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/worker" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/spkg/bom" "go.uber.org/zap" "golang.org/x/text/encoding/simplifiedchinese" ) @@ -83,7 +84,7 @@ func ExportStatement(ctx context.Context, store storage.ExternalStorage, sqlFile } defer fd.Close() - br := bufio.NewReader(fd) + br := bufio.NewReader(bom.NewReader(fd)) data := make([]byte, 0, sqlFile.FileMeta.FileSize+1) buffer := make([]byte, 0, sqlFile.FileMeta.FileSize+1) diff --git a/br/tests/lightning_bom_file/config.toml b/br/tests/lightning_bom_file/config.toml new file mode 100644 index 0000000000000..291d1b166103a --- /dev/null +++ b/br/tests/lightning_bom_file/config.toml @@ -0,0 +1,2 @@ +[mydumper.csv] +header = true diff --git a/br/tests/lightning_bom_file/data/mytest.testtbl-schema.sql b/br/tests/lightning_bom_file/data/mytest.testtbl-schema.sql new file mode 100644 index 0000000000000..4232788898790 --- /dev/null +++ b/br/tests/lightning_bom_file/data/mytest.testtbl-schema.sql @@ -0,0 +1,5 @@ +CREATE TABLE testtbl ( + id INTEGER, + val1 VARCHAR(40) NOT NULL, + INDEX `idx_val1` (`val1`) +); diff --git a/br/tests/lightning_bom_file/data/mytest.testtbl.csv b/br/tests/lightning_bom_file/data/mytest.testtbl.csv new file mode 100644 index 0000000000000..e0931cce2a480 --- /dev/null +++ b/br/tests/lightning_bom_file/data/mytest.testtbl.csv @@ -0,0 +1,6 @@ +id,val1 +1,"aaa01" +2,"aaa01" +3,"aaa02" +4,"aaa02" +5,"aaa05" diff --git a/br/tests/lightning_bom_file/run.sh b/br/tests/lightning_bom_file/run.sh new file mode 100755 index 0000000000000..9201209c47018 --- /dev/null +++ b/br/tests/lightning_bom_file/run.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# +# Copyright 2023 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +mydir=$(dirname "${BASH_SOURCE[0]}") + +data_file="${mydir}/data/mytest.testtbl.csv" + +row_count=$( sed '1d' "${data_file}" | wc -l | xargs echo ) + +run_lightning --backend tidb + +# Check that everything is correctly imported +run_sql 'SELECT count(*) FROM mytest.testtbl' +check_contains "count(*): ${row_count}" + +check_cluster_version 4 0 0 'local backend' || exit 0 +run_sql "DROP TABLE mytest.testtbl" + +run_lightning --backend local + +# Check that everything is correctly imported +run_sql 'SELECT count(*) FROM mytest.testtbl' +check_contains "count(*): ${row_count}" diff --git a/go.mod b/go.mod index 36b21faac466a..aa1d4fc5e01a1 100644 --- a/go.mod +++ b/go.mod @@ -87,6 +87,7 @@ require ( github.com/soheilhy/cmux v0.1.5 github.com/spf13/cobra v1.6.1 github.com/spf13/pflag v1.0.5 + github.com/spkg/bom v1.0.0 github.com/stathat/consistent v1.0.0 github.com/stretchr/testify v1.8.1 github.com/tdakkota/asciicheck v0.1.1 diff --git a/go.sum b/go.sum index 663e5008e190d..d7e5ac3a698c1 100644 --- a/go.sum +++ b/go.sum @@ -1278,6 +1278,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= +github.com/spkg/bom v1.0.0 h1:S939THe0ukL5WcTGiGqkgtaW5JW+O6ITaIlpJXTYY64= +github.com/spkg/bom v1.0.0/go.mod h1:lAz2VbTuYNcvs7iaFF8WW0ufXrHShJ7ck1fYFFbVXJs= github.com/stathat/consistent v1.0.0 h1:ZFJ1QTRn8npNBKW065raSZ8xfOqhpb8vLOkfp4CcL/U= github.com/stathat/consistent v1.0.0/go.mod h1:uajTPbgSygZBJ+V+0mY7meZ8i0XAcZs7AQ6V121XSxw= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=