From b43d85c414e7ec1815ec7e941ec0a3785dca3d77 Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Fri, 27 Jan 2017 11:08:45 +0100 Subject: [PATCH] Fix C2FO/fast-csv#28 --- lib/parser/parser_stream.js | 10 ++++++++++ test/assets/test28.csv | 10 ++++++++++ test/issues.test.js | 19 ++++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 test/assets/test28.csv diff --git a/lib/parser/parser_stream.js b/lib/parser/parser_stream.js index 4325ade8..b93c1163 100644 --- a/lib/parser/parser_stream.js +++ b/lib/parser/parser_stream.js @@ -194,11 +194,21 @@ extended(ParserStream).extend({ } }, + __removeBOM: function (data) { + // Catches EFBBBF (UTF-8 BOM) because the buffer-to-string + // conversion translates it to FEFF (UTF-16 BOM) + if (data && typeof data == 'string' && data.charCodeAt(0) == '0xFEFF') { + return data.slice(1); + } + return data; + }, + _transform: function (data, encoding, done) { var lines = this.lines, lineData = (lines + this.decoder.write(data)), self = this; if (lineData.length > 1) { + lineData = this.__removeBOM(lineData); this._parse(lineData, true, function (err, lineData) { if (err) { done(err); diff --git a/test/assets/test28.csv b/test/assets/test28.csv new file mode 100644 index 00000000..98d2651d --- /dev/null +++ b/test/assets/test28.csv @@ -0,0 +1,10 @@ +first_name,last_name,email_address,address +First1,Last1,email1@email.com,"1 Street St, State ST, 88888" +First2,Last2,email2@email.com,"2 Street St, State ST, 88888" +First3,Last3,email3@email.com,"3 Street St, State ST, 88888" +First4,Last4,email4@email.com,"4 Street St, State ST, 88888" +First5,Last5,email5@email.com,"5 Street St, State ST, 88888" +First6,Last6,email6@email.com,"6 Street St, State ST, 88888" +First7,Last7,email7@email.com,"7 Street St, State ST, 88888" +First8,Last8,email8@email.com,"8 Street St, State ST, 88888" +First9,Last9,email9@email.com,"9 Street St, State ST, 88888" \ No newline at end of file diff --git a/test/issues.test.js b/test/issues.test.js index 6fe5394a..edb7f14e 100644 --- a/test/issues.test.js +++ b/test/issues.test.js @@ -283,4 +283,21 @@ it.describe("github issues", function (it) { }); }); -}); \ No newline at end of file + + it.describe("#131", function (it) { + + it.should("parse a csv with a UTF-8 Byte Order Mark", function (next) { + var actual = []; + csv + .fromPath(path.resolve(__dirname, "./assets/test28.csv"), {headers: true}) + .on("data", function (data, index) { + actual.push(data); + }). + on("end", function (count) { + assert.deepEqual(actual[0]["first_name"], "First1"); + assert.equal(count, actual.length); + next(); + }); + }); + }); +});