Skip to content

Commit

Permalink
Merge #97545
Browse files Browse the repository at this point in the history
97545: importer: fix scanning of newlines in enclosed fields for DELIMITED DATA r=rafiss a=andyyang890

Previously, when using `IMPORT INTO ... DELIMITED DATA`, unescaped
newlines within quoted fields were treated as row terminators instead
of as part of the field. This patch refactors the scanning logic to
correctly interpret these newlines.

Fixes #95906

Release note (bug fix): `IMPORT INTO ... DELIMITED DATA` will now
correctly handle quoted fields that contain unescaped newlines.

Co-authored-by: Andy Yang <[email protected]>
  • Loading branch information
craig[bot] and andyyang890 committed Mar 1, 2023
2 parents 591de60 + 748a496 commit 65752af
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 25 deletions.
10 changes: 10 additions & 0 deletions pkg/sql/importer/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,16 @@ ORDER BY table_name
`SELECT * from t`: {{"foo", "normal"}},
},
},
{
name: "unescaped newline in quoted field",
create: `a string, b string`,
with: `WITH fields_enclosed_by = '$'`,
typ: "DELIMITED",
data: "foo\t$foo\nbar$\nfoo\tbar",
query: map[string][][]string{
`SELECT * FROM t`: {{"foo", "foo\nbar"}, {"foo", "bar"}},
},
},
{
name: "field enclosure in middle of unquoted field",
create: `a string, b string`,
Expand Down
57 changes: 32 additions & 25 deletions pkg/sql/importer/read_import_mysqlout.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ type delimitedProducer struct {
reader *bufio.Reader
row []rune
err error
eof bool
}

var _ importRowProducer = &delimitedProducer{}
Expand All @@ -94,26 +93,23 @@ func (d *delimitedProducer) Scan() bool {
d.row = nil
var r rune
var w int
nextLiteral := false
fieldEnclosed := false
// gotEncloser represents whether the previous character we scanned is the
// encloser (the character used for beginning and ending quoted fields).
var gotEncloser bool
// inEnclosedField represents whether we have started scanning an enclosed
// field (more specific than inField).
var inEnclosedField bool
// inEscapeSeq represents whether we have started scanning an escape sequence.
var inEscapeSeq bool
// inField represents whether we have started scanning a field.
var inField bool

for {
r, w, d.err = d.reader.ReadRune()
if d.err == io.EOF {
d.eof = true
d.err = nil
return d.row != nil
}

if d.eof {
if d.row != nil {
return true
}
if nextLiteral {
d.err = io.ErrUnexpectedEOF
}
return false
}

if d.err != nil {
return false
}
Expand All @@ -130,23 +126,34 @@ func (d *delimitedProducer) Scan() bool {
r = rune(raw)
}

if r == d.opts.RowSeparator && !nextLiteral && !fieldEnclosed {
if d.opts.Enclose != roachpb.MySQLOutfileOptions_Never {
// We only care about well-formed, enclosed fields (i.e. fields that
// start and end with the encloser rune with no additional runes either
// before or after the field). More precisely: 1) an encloser only
// starts a field if it is at the start of a row or immediately follows
// a field terminator and 2) an encloser only ends a field if it is
// immediately followed by the field terminator rune.
// We let FillDatums take care of reporting and handling any errors.
if inEnclosedField && gotEncloser && (r == d.opts.FieldSeparator || r == d.opts.RowSeparator) {
inEnclosedField = false
}
gotEncloser = r == d.opts.Encloser
if gotEncloser && !inField {
inEnclosedField = true
}
}

if r == d.opts.RowSeparator && !inEscapeSeq && !inEnclosedField {
return true
}

d.row = append(d.row, r)
inField = r != d.opts.FieldSeparator

if d.opts.HasEscape {
nextLiteral = !nextLiteral && r == d.opts.Escape
inEscapeSeq = !inEscapeSeq && r == d.opts.Escape
}

if d.opts.Enclose != roachpb.MySQLOutfileOptions_Never && r == d.opts.Encloser {
// We only care about well formed, enclosed fields (i.e. ones that start with
// enclose rune. If we see enclose character anywhere else, then we either
// close the opened enclosing, or we treat this as an invalid enclosing,
// and let FillDatums below take care of reporting and handling any errors.
fieldEnclosed = len(d.row) == 1
}
d.row = append(d.row, r)
}
}

Expand Down

0 comments on commit 65752af

Please sign in to comment.