Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

importer: fix scanning of newlines in enclosed fields for DELIMITED DATA #97545

Merged
merged 1 commit into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pkg/sql/importer/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,16 @@ ORDER BY table_name
`SELECT * from t`: {{"foo", "normal"}},
},
},
{
name: "unescaped newline in quoted field",
create: `a string, b string`,
with: `WITH fields_enclosed_by = '$'`,
typ: "DELIMITED",
data: "foo\t$foo\nbar$\nfoo\tbar",
query: map[string][][]string{
`SELECT * FROM t`: {{"foo", "foo\nbar"}, {"foo", "bar"}},
},
},
{
name: "field enclosure in middle of unquoted field",
create: `a string, b string`,
Expand Down
57 changes: 32 additions & 25 deletions pkg/sql/importer/read_import_mysqlout.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ type delimitedProducer struct {
reader *bufio.Reader
row []rune
err error
eof bool
}

var _ importRowProducer = &delimitedProducer{}
Expand All @@ -94,26 +93,23 @@ func (d *delimitedProducer) Scan() bool {
d.row = nil
var r rune
var w int
nextLiteral := false
fieldEnclosed := false
// gotEncloser represents whether the previous character we scanned is the
// encloser (the character used for beginning and ending quoted fields).
var gotEncloser bool
// inEnclosedField represents whether we have started scanning an enclosed
// field (more specific than inField).
var inEnclosedField bool
// inEscapeSeq represents whether we have started scanning an escape sequence.
var inEscapeSeq bool
// inField represents whether we have started scanning a field.
var inField bool

for {
r, w, d.err = d.reader.ReadRune()
if d.err == io.EOF {
d.eof = true
d.err = nil
return d.row != nil
}

if d.eof {
if d.row != nil {
return true
}
if nextLiteral {
d.err = io.ErrUnexpectedEOF
}
return false
}

if d.err != nil {
return false
}
Expand All @@ -130,23 +126,34 @@ func (d *delimitedProducer) Scan() bool {
r = rune(raw)
}

if r == d.opts.RowSeparator && !nextLiteral && !fieldEnclosed {
if d.opts.Enclose != roachpb.MySQLOutfileOptions_Never {
// We only care about well-formed, enclosed fields (i.e. fields that
// start and end with the encloser rune with no additional runes either
// before or after the field). More precisely: 1) an encloser only
// starts a field if it is at the start of a row or immediately follows
// a field terminator and 2) an encloser only ends a field if it is
// immediately followed by the field terminator rune.
// We let FillDatums take care of reporting and handling any errors.
if inEnclosedField && gotEncloser && (r == d.opts.FieldSeparator || r == d.opts.RowSeparator) {
inEnclosedField = false
}
gotEncloser = r == d.opts.Encloser
if gotEncloser && !inField {
inEnclosedField = true
}
}

if r == d.opts.RowSeparator && !inEscapeSeq && !inEnclosedField {
return true
}

d.row = append(d.row, r)
inField = r != d.opts.FieldSeparator

if d.opts.HasEscape {
nextLiteral = !nextLiteral && r == d.opts.Escape
inEscapeSeq = !inEscapeSeq && r == d.opts.Escape
}

if d.opts.Enclose != roachpb.MySQLOutfileOptions_Never && r == d.opts.Encloser {
// We only care about well formed, enclosed fields (i.e. ones that start with
// enclose rune. If we see enclose character anywhere else, then we either
// close the opened enclosing, or we treat this as an invalid enclosing,
// and let FillDatums below take care of reporting and handling any errors.
fieldEnclosed = len(d.row) == 1
}
d.row = append(d.row, r)
}
}

Expand Down