Skip to content

Commit

Permalink
load data: fix bug if load data with long content (#29222)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiongjiwei authored Dec 1, 2021
1 parent e97e8c0 commit 80d18b6
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
15 changes: 6 additions & 9 deletions executor/load_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,9 @@ func (e *LoadDataInfo) isInQuoter(bs []byte) bool {
return inQuoter
}

// indexOfTerminator return index of terminator, if not, return -1.
// IndexOfTerminator return index of terminator, if not, return -1.
// normally, the field terminator and line terminator is short, so we just use brute force algorithm.
func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int {
func (e *LoadDataInfo) IndexOfTerminator(bs []byte, inQuoter bool) int {
fieldTerm := []byte(e.FieldsInfo.Terminated)
fieldTermLen := len(fieldTerm)
lineTerm := []byte(e.LinesInfo.Terminated)
Expand Down Expand Up @@ -459,13 +459,10 @@ func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int {
}
}
atFieldStart := true
inQuoter := false
loop:
for i := 0; i < len(bs); i++ {
if atFieldStart && bs[i] == e.FieldsInfo.Enclosed {
if !isInQuoter {
inQuoter = true
}
inQuoter = !inQuoter
atFieldStart = false
continue
}
Expand Down Expand Up @@ -525,7 +522,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [
if ignore {
endIdx = strings.Index(string(hack.String(curData[curStartIdx:])), e.LinesInfo.Terminated)
} else {
endIdx = e.indexOfTerminator(curData[curStartIdx:], inquotor)
endIdx = e.IndexOfTerminator(curData[curStartIdx:], inquotor)
}
}
if endIdx == -1 {
Expand All @@ -539,7 +536,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [
if ignore {
endIdx = strings.Index(string(hack.String(curData[startingLen:])), e.LinesInfo.Terminated)
} else {
endIdx = e.indexOfTerminator(curData[startingLen:], inquotor)
endIdx = e.IndexOfTerminator(curData[startingLen:], inquotor)
}
if endIdx != -1 {
nextDataIdx := startingLen + endIdx + terminatedLen
Expand All @@ -560,7 +557,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [
if ignore {
endIdx = strings.Index(string(hack.String(prevData[startingLen:])), e.LinesInfo.Terminated)
} else {
endIdx = e.indexOfTerminator(prevData[startingLen:], inquotor)
endIdx = e.IndexOfTerminator(prevData[startingLen:], inquotor)
}
if endIdx >= prevLen {
return prevData[startingLen : startingLen+endIdx], curData[nextDataIdx:], true
Expand Down
22 changes: 22 additions & 0 deletions executor/write_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/pingcap/tidb/executor"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/planner/core"
Expand Down Expand Up @@ -2129,6 +2130,27 @@ func TestLoadDataEscape(t *testing.T) {
checkCases(tests, ld, t, tk, ctx, selectSQL, deleteSQL)
}

func TestLoadDataWithLongContent(t *testing.T) {
e := &executor.LoadDataInfo{
FieldsInfo: &ast.FieldsClause{Terminated: ",", Escaped: '\\', Enclosed: '"'},
LinesInfo: &ast.LinesClause{Terminated: "\n"},
}
tests := []struct {
content string
inQuoter bool
expectedIndex int
}{
{"123,123\n123,123", false, 7},
{"123123\\n123123", false, -1},
{"123123\n123123", true, -1},
{"123123\n123123\"\n", true, 14},
}

for _, tt := range tests {
require.Equal(t, tt.expectedIndex, e.IndexOfTerminator([]byte(tt.content), tt.inQuoter))
}
}

// TestLoadDataSpecifiedColumns reuse TestLoadDataEscape's test case :-)
func TestLoadDataSpecifiedColumns(t *testing.T) {
trivialMsg := "Records: 1 Deleted: 0 Skipped: 0 Warnings: 0"
Expand Down

0 comments on commit 80d18b6

Please sign in to comment.