From 4606e7b7d35f919f19d23123a6f478eb67451538 Mon Sep 17 00:00:00 2001 From: Kyle Derkacz Date: Fri, 4 Mar 2016 20:39:35 -0800 Subject: [PATCH 1/3] Supporting CRLF's by checking next byte If the next byte after a carriage return (CR) is a line feed (LF) then don't replace the CR. The result will be a CRLF which is supported by the Go CSV reader. --- reader.go | 2 +- reader_test.go | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/reader.go b/reader.go index 987a040..fc460e1 100644 --- a/reader.go +++ b/reader.go @@ -27,7 +27,7 @@ func New(r io.Reader) io.Reader { func (r reader) Read(p []byte) (n int, err error) { n, err = r.r.Read(p) for i, b := range p { - if b == rByte { + if b == rByte && i < len(p) && p[i+1] != nByte { p[i] = nByte } } diff --git a/reader_test.go b/reader_test.go index 10d6a2f..fea0c88 100644 --- a/reader_test.go +++ b/reader_test.go @@ -1,6 +1,7 @@ package macreader import ( + "testing" "bytes" "encoding/csv" "fmt" @@ -32,3 +33,60 @@ func Example() { // With macreader: [][]string{[]string{"a", "b", "c"}, []string{"1", "2", "3"}} } + + +func TestCR(t *testing.T) { + testFile := bytes.NewBufferString("a,b,c\r1,2,3\r").Bytes() + + r := csv.NewReader(New(bytes.NewReader(testFile))) + lines, err := r.ReadAll() + + if err != nil { + t.Errorf("An error occurred while reading the data: %v", err) + } + if len(lines) != 2 { + t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + } +} + +func TestLF(t *testing.T) { + testFile := bytes.NewBufferString("a,b,c\n1,2,3\n").Bytes() + + r := csv.NewReader(New(bytes.NewReader(testFile))) + lines, err := r.ReadAll() + + if err != nil { + t.Errorf("An error occurred while reading the data: %v", err) + } + if len(lines) != 2 { + t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + } +} + +func TestCRLF(t *testing.T) { + testFile := bytes.NewBufferString("a,b,c\r\n1,2,3\r\n").Bytes() + + r := csv.NewReader(New(bytes.NewReader(testFile))) + lines, err := r.ReadAll() + + if err != nil { + t.Errorf("An error occurred while reading the data: %v", err) + } + if len(lines) != 2 { + t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + } +} + +func TestCRInQuote(t *testing.T) { + testFile := bytes.NewBufferString("a,\"foo,\rbar\",c\r1,\"2\r\n2\",3\r").Bytes() + + r := csv.NewReader(New(bytes.NewReader(testFile))) + lines, err := r.ReadAll() + + if err != nil { + t.Errorf("An error occurred while reading the data: %v", err) + } + if len(lines) != 2 { + t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + } +} \ No newline at end of file From ccd3568111db2f0be7f7e1f52e6093d21ab77749 Mon Sep 17 00:00:00 2001 From: Kyle Derkacz Date: Fri, 4 Mar 2016 22:16:47 -0800 Subject: [PATCH 2/3] Adding a check to make sure that the quoted CRLF didn't become a LFLF --- reader_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/reader_test.go b/reader_test.go index fea0c88..d4acb36 100644 --- a/reader_test.go +++ b/reader_test.go @@ -5,6 +5,7 @@ import ( "bytes" "encoding/csv" "fmt" + "strings" ) func Example() { @@ -89,4 +90,7 @@ func TestCRInQuote(t *testing.T) { if len(lines) != 2 { t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) } + if strings.Contains(lines[1][1], "\n\n") { + t.Error("The CRLF was converted to a LFLF") + } } \ No newline at end of file From af16e59bdded4ec141e4ae5e49c34f0bbaa21d82 Mon Sep 17 00:00:00 2001 From: Kyle Derkacz Date: Fri, 4 Mar 2016 23:59:40 -0800 Subject: [PATCH 3/3] Fixing the case where the \r happens to the last byte in the buffer length --- reader.go | 2 +- reader_test.go | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/reader.go b/reader.go index fc460e1..47801c3 100644 --- a/reader.go +++ b/reader.go @@ -27,7 +27,7 @@ func New(r io.Reader) io.Reader { func (r reader) Read(p []byte) (n int, err error) { n, err = r.r.Read(p) for i, b := range p { - if b == rByte && i < len(p) && p[i+1] != nByte { + if j := i + 1; b == rByte && ((j < len(p) && p[j] != nByte) || j == len(p)) { p[i] = nByte } } diff --git a/reader_test.go b/reader_test.go index d4acb36..c50f5e5 100644 --- a/reader_test.go +++ b/reader_test.go @@ -46,7 +46,7 @@ func TestCR(t *testing.T) { t.Errorf("An error occurred while reading the data: %v", err) } if len(lines) != 2 { - t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } } @@ -60,7 +60,7 @@ func TestLF(t *testing.T) { t.Errorf("An error occurred while reading the data: %v", err) } if len(lines) != 2 { - t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } } @@ -74,7 +74,7 @@ func TestCRLF(t *testing.T) { t.Errorf("An error occurred while reading the data: %v", err) } if len(lines) != 2 { - t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } } @@ -88,9 +88,9 @@ func TestCRInQuote(t *testing.T) { t.Errorf("An error occurred while reading the data: %v", err) } if len(lines) != 2 { - t.Error("Wrong number of lines. Expected 2, got %d", len(lines)) + t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } if strings.Contains(lines[1][1], "\n\n") { t.Error("The CRLF was converted to a LFLF") } -} \ No newline at end of file +}