From 80d18b6efb45de297258e566adf9de5d80e8fd24 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Wed, 1 Dec 2021 16:25:54 +0800 Subject: [PATCH] load data: fix bug if load data with long content (#29222) --- executor/load_data.go | 15 ++++++--------- executor/write_test.go | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/executor/load_data.go b/executor/load_data.go index 03539a4f8c9a9..1202675ebbce0 100644 --- a/executor/load_data.go +++ b/executor/load_data.go @@ -419,9 +419,9 @@ func (e *LoadDataInfo) isInQuoter(bs []byte) bool { return inQuoter } -// indexOfTerminator return index of terminator, if not, return -1. +// IndexOfTerminator return index of terminator, if not, return -1. // normally, the field terminator and line terminator is short, so we just use brute force algorithm. -func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int { +func (e *LoadDataInfo) IndexOfTerminator(bs []byte, inQuoter bool) int { fieldTerm := []byte(e.FieldsInfo.Terminated) fieldTermLen := len(fieldTerm) lineTerm := []byte(e.LinesInfo.Terminated) @@ -459,13 +459,10 @@ func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int { } } atFieldStart := true - inQuoter := false loop: for i := 0; i < len(bs); i++ { if atFieldStart && bs[i] == e.FieldsInfo.Enclosed { - if !isInQuoter { - inQuoter = true - } + inQuoter = !inQuoter atFieldStart = false continue } @@ -525,7 +522,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [ if ignore { endIdx = strings.Index(string(hack.String(curData[curStartIdx:])), e.LinesInfo.Terminated) } else { - endIdx = e.indexOfTerminator(curData[curStartIdx:], inquotor) + endIdx = e.IndexOfTerminator(curData[curStartIdx:], inquotor) } } if endIdx == -1 { @@ -539,7 +536,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [ if ignore { endIdx = strings.Index(string(hack.String(curData[startingLen:])), e.LinesInfo.Terminated) } else { - endIdx = e.indexOfTerminator(curData[startingLen:], inquotor) + endIdx = e.IndexOfTerminator(curData[startingLen:], inquotor) } if endIdx != -1 { nextDataIdx := startingLen + endIdx + terminatedLen @@ -560,7 +557,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [ if ignore { endIdx = strings.Index(string(hack.String(prevData[startingLen:])), e.LinesInfo.Terminated) } else { - endIdx = e.indexOfTerminator(prevData[startingLen:], inquotor) + endIdx = e.IndexOfTerminator(prevData[startingLen:], inquotor) } if endIdx >= prevLen { return prevData[startingLen : startingLen+endIdx], curData[nextDataIdx:], true diff --git a/executor/write_test.go b/executor/write_test.go index 879c8c76c035f..d30c1f19ba8d2 100644 --- a/executor/write_test.go +++ b/executor/write_test.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/planner/core" @@ -2129,6 +2130,27 @@ func TestLoadDataEscape(t *testing.T) { checkCases(tests, ld, t, tk, ctx, selectSQL, deleteSQL) } +func TestLoadDataWithLongContent(t *testing.T) { + e := &executor.LoadDataInfo{ + FieldsInfo: &ast.FieldsClause{Terminated: ",", Escaped: '\\', Enclosed: '"'}, + LinesInfo: &ast.LinesClause{Terminated: "\n"}, + } + tests := []struct { + content string + inQuoter bool + expectedIndex int + }{ + {"123,123\n123,123", false, 7}, + {"123123\\n123123", false, -1}, + {"123123\n123123", true, -1}, + {"123123\n123123\"\n", true, 14}, + } + + for _, tt := range tests { + require.Equal(t, tt.expectedIndex, e.IndexOfTerminator([]byte(tt.content), tt.inQuoter)) + } +} + // TestLoadDataSpecifiedColumns reuse TestLoadDataEscape's test case :-) func TestLoadDataSpecifiedColumns(t *testing.T) { trivialMsg := "Records: 1 Deleted: 0 Skipped: 0 Warnings: 0"