From e711954ba2e7cd272f31a9ec7ee036a8c78a575c Mon Sep 17 00:00:00 2001 From: Chet Brittingham Date: Mon, 5 Aug 2019 13:13:20 -0500 Subject: [PATCH] Feature add maxRows for #275 --- docs/parsing.md | 458 ++++++++++++++++++------------------ src/parser/ParserOptions.ts | 13 +- src/parser/parser/Parser.ts | 7 +- 3 files changed, 245 insertions(+), 233 deletions(-) diff --git a/docs/parsing.md b/docs/parsing.md index 351753b3..3b94809d 100644 --- a/docs/parsing.md +++ b/docs/parsing.md @@ -1,127 +1,129 @@ # Parsing -* [Options](#parsing-options) -* [Events](#parsing-events) -* [Parsing Methods](#parsing-methods) - * [`csv.parse`](#csv-parse) - * [`csv.parseStream`](#csv-parse-stream) - * [`csv.parseFile`](#csv-parse-path) - * [`csv.parseString`](#csv-parse-string) -* [Examples](#examples) - * [Manual Write](#csv-parse-manual-write) - * [Alternate Delimiters](#csv-parse-alternate-delimiters) - * [First Row As Headers](#csv-parse-first-row-as-headers) - * [Custom Headers](#csv-parse-custom-headers) - * [Renaming Headers](#csv-parse-renaming-headers) - * [Skipping Columns](#csv-parse-skipping-columns) - * [Ignoring Empty Rows](#csv-parse-ignoring-empty-rows) - * [Transforming Rows](#csv-parse-transforming) - * [Validating Rows](#csv-parse-validation) +- [Options](#parsing-options) +- [Events](#parsing-events) +- [Parsing Methods](#parsing-methods) + - [`csv.parse`](#csv-parse) + - [`csv.parseStream`](#csv-parse-stream) + - [`csv.parseFile`](#csv-parse-path) + - [`csv.parseString`](#csv-parse-string) +- [Examples](#examples) + - [Manual Write](#csv-parse-manual-write) + - [Alternate Delimiters](#csv-parse-alternate-delimiters) + - [First Row As Headers](#csv-parse-first-row-as-headers) + - [Custom Headers](#csv-parse-custom-headers) + - [Renaming Headers](#csv-parse-renaming-headers) + - [Skipping Columns](#csv-parse-skipping-columns) + - [Ignoring Empty Rows](#csv-parse-ignoring-empty-rows) + - [Transforming Rows](#csv-parse-transforming) + - [Validating Rows](#csv-parse-validation) + ## Options -* `objectMode: {boolean} = true`: Ensure that `data` events have an object emitted rather than the stringified version set to false to have a stringified buffer. -* `delimiter: {string} = ','`: If your data uses an alternate delimiter such as `;` or `\t`. - * **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimiter -* `quote: {string} = '"'`: The character to use to quote fields that contain a delimiter. If you set to `null` then all quoting will be ignored. - * `"first,name",last name` -* `escape: {string} = '"'`: The character to used tp escape quotes inside of a quoted field. - * `i.e`: `First,"Name"' => '"First,""Name"""` -* `headers: {boolean|string[]} = false`: - * If you want the first row to be treated as headers then set to `true` - * If there is not a headers row and you want to provide one then set to a `string[]` - * If you wish to discard the first row and use your own headers set to a `string[]` and set the `renameHeaders` option to `true` -* `renameHeaders: {boolean} = false`: If you want the first line of the file to be removed and replaced by the one provided in the `headers` option. - * **NOTE** This option should only be used if the `headers` option is a `string[]` -* `ignoreEmpty: {boolean} = false`: If you wish to ignore empty rows. - * **NOTE** this will discard columns that are all white space or delimiters. -* `comment: {string} = null`: If your CSV contains comments you can use this option to ignore lines that begin with the specified character (e.g. `#`). -* `discardUnmappedColumns: {boolean} = false`: If you want to discard columns that do not map to a header. - * **NOTE** this is only valid in the case that there are headers and the number of fields parsed is greater than the number of header fields. -* `comment: -* `strictColumnHandling: {boolean} = false`: If you want to consider empty lines/lines with too few fields as invalid and emit a `data-invalid` event - * **NOTE** This option is only considered when `headers` are present. -* `trim: {boolean} = false`: Set to `true` to trim all fields -* `rtrim: {boolean} = false`: Set to `true` to right trim all fields. -* `ltrim: {boolean} = false`: Set to `true` to left trim all fields. -* `encoding: {string} = 'utf8'`: Passed to [StringDecoder](https://nodejs.org/api/string_decoder.html#string_decoder_new_stringdecoder_encoding) when decoding incoming buffers. Change if incoming content is not 'utf8' encoded. +- `objectMode: {boolean} = true`: Ensure that `data` events have an object emitted rather than the stringified version set to false to have a stringified buffer. +- `delimiter: {string} = ','`: If your data uses an alternate delimiter such as `;` or `\t`. + - **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimiter +- `quote: {string} = '"'`: The character to use to quote fields that contain a delimiter. If you set to `null` then all quoting will be ignored. + - `"first,name",last name` +- `escape: {string} = '"'`: The character to used tp escape quotes inside of a quoted field. + - `i.e`: `First,"Name"' => '"First,""Name"""` +- `headers: {boolean|string[]} = false`: + - If you want the first row to be treated as headers then set to `true` + - If there is not a headers row and you want to provide one then set to a `string[]` + - If you wish to discard the first row and use your own headers set to a `string[]` and set the `renameHeaders` option to `true` +- `renameHeaders: {boolean} = false`: If you want the first line of the file to be removed and replaced by the one provided in the `headers` option. + - **NOTE** This option should only be used if the `headers` option is a `string[]` +- `ignoreEmpty: {boolean} = false`: If you wish to ignore empty rows. + - **NOTE** this will discard columns that are all white space or delimiters. +- `comment: {string} = null`: If your CSV contains comments you can use this option to ignore lines that begin with the specified character (e.g. `#`). +- `discardUnmappedColumns: {boolean} = false`: If you want to discard columns that do not map to a header. + - **NOTE** this is only valid in the case that there are headers and the number of fields parsed is greater than the number of header fields. +- `comment: +- `strictColumnHandling: {boolean} = false`: If you want to consider empty lines/lines with too few fields as invalid and emit a `data-invalid` event + - **NOTE** This option is only considered when `headers` are present. +- `trim: {boolean} = false`: Set to `true` to trim all fields +- `rtrim: {boolean} = false`: Set to `true` to right trim all fields. +- `ltrim: {boolean} = false`: Set to `true` to left trim all fields. +- `encoding: {string} = 'utf8'`: Passed to [StringDecoder](https://nodejs.org/api/string_decoder.html#string_decoder_new_stringdecoder_encoding) when decoding incoming buffers. Change if incoming content is not 'utf8' encoded. +- `maxRows: {number}`: Up to the given number of rows will be returned if set to a number greater than 0 (e.g., `100` would return the first 100 rows of data). + ## Events -* `data`: Emitted when a record is parsed. - * If headers are present then all rows will be an object. - * If headers are not present then all rows will be an array. - * **NOTE** if `objectMode` is set to false then all rows will be a buffer with a JSON row. -* `data-invalid`: Emitted if there was invalid row encounted; - * Emitted when a `validate` function is provided and an invalid row is encountered. - * Emitted when `strictColumnHandling` is `true` and a row with a different number of fields than headers is encountered. +- `data`: Emitted when a record is parsed. + - If headers are present then all rows will be an object. + - If headers are not present then all rows will be an array. + - **NOTE** if `objectMode` is set to false then all rows will be a buffer with a JSON row. +- `data-invalid`: Emitted if there was invalid row encounted; + - Emitted when a `validate` function is provided and an invalid row is encountered. + - Emitted when `strictColumnHandling` is `true` and a row with a different number of fields than headers is encountered. + ## Methods **`csv.parse([options]): CsvParserStream`** -Creates a Csv Parsing Stream that can be piped or written to. +Creates a Csv Parsing Stream that can be piped or written to. This is the main entrypoint and is used by all the other parsing helpers. ```javascript - -//creates a stream you can pipe -const stream = csv.parse() +//creates a stream you can pipe +const stream = csv.parse(); stream - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); ``` To pipe to the stream from a file you can do the following. ```javascript -const csv = require('fast-csv'); +const csv = require("fast-csv"); -fs.createReadStream('my.csv') - .pipe(csv.parse()) - .on('error', error => console.error(error)) - .on('data', row => console.log(`ROW=${JSON.stringify(row)}`)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); +fs.createReadStream("my.csv") + .pipe(csv.parse()) + .on("error", error => console.error(error)) + .on("data", row => console.log(`ROW=${JSON.stringify(row)}`)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); ``` ```javascript -const csv = require('fast-csv'); +const csv = require("fast-csv"); const fileStream = fs.createReadStream("my.csv"); const parser = csv.parse(); fileStream - .pipe(parser) - .on('error', error => console.error(error)) - .on('readable', () => { - for (let row = parser.read(); row; row = parser.read()) { - console.log(`ROW=${JSON.stringify(row)}`); - } - }) - .on('end', (rowCount) => console.log(`Parsed ${rowCount} rows`)); + .pipe(parser) + .on("error", error => console.error(error)) + .on("readable", () => { + for (let row = parser.read(); row; row = parser.read()) { + console.log(`ROW=${JSON.stringify(row)}`); + } + }) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); ``` **`csv.parseStream(readableStream[, options]): CsvParserStream`** -Accepts a readable stream and pipes it to a `CsvParserStream`. +Accepts a readable stream and pipes it to a `CsvParserStream`. ```javascript -const stream = fs.createReadStream('./path/to/my.csv'); +const stream = fs.createReadStream("./path/to/my.csv"); csv .parseStream(stream) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); - + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); ``` @@ -130,13 +132,13 @@ csv Parses a file from the specified path and returns the `CsvParserStream`. ```javascript -const csv = require('fast-csv'); +const csv = require("fast-csv"); csv - .parseFile('./path/to/my.csv') - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parseFile("./path/to/my.csv") + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); ``` @@ -145,47 +147,44 @@ csv This method parses a string and returns the `CsvParserStream`. ```javascript -const { EOL } = require('os'); -const csv = require('fast-csv'); +const { EOL } = require("os"); +const csv = require("fast-csv"); -const CSV_STRING = [ - 'a,b', - 'a1,b1', - 'a2,b2', -].join(EOL); +const CSV_STRING = ["a,b", "a1,b1", "a2,b2"].join(EOL); csv - .fromString(CSV_STRING, { headers: true }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .fromString(CSV_STRING, { headers: true }) + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); ``` + ## Examples All of the examples below can be found in [examples/parsing](../examples/parsing) directory. - + ### Manual Write [`examples/parsing/manual_write.example.js`](../examples/parsing/manual_write.examples.js) ```javascript -const csv = require('fast-csv'); +const csv = require("fast-csv"); -const stream = csv.parse({ headers: true }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); +const stream = csv + .parse({ headers: true }) + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); -stream.write('header1,header2\n'); -stream.write('col1,col2'); +stream.write("header1,header2\n"); +stream.write("col1,col2"); stream.end(); - ``` Expected output @@ -196,6 +195,7 @@ Parsed 1 rows ``` + ### Alternate Delimiter You can provide a `delimiter` option to change the delimiter from a `,` character. @@ -203,16 +203,13 @@ You can provide a `delimiter` option to change the delimiter from a `,` characte [`examples/parsing/alternate_delimiter.example.js`](../examples/parsing/alternate_delimiter.examples.js) ```javascript -const CSV_STRING = [ - 'a1\tb1', - 'a2\tb2', -].join(EOL); +const CSV_STRING = ["a1\tb1", "a2\tb2"].join(EOL); const stream = csv - .parse({ delimiter: '\t' }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ delimiter: "\t" }) + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); @@ -227,34 +224,32 @@ Parsed 2 rows ``` + ### First Row As Headers -If you expect the first line your CSV to be headers you may pass in a `headers` option. +If you expect the first line your CSV to be headers you may pass in a `headers` option. Setting the `headers` option to `true` will cause change each row to an object rather than an array. [`examples/parsing/first_row_as_headers.example.js`](../examples/parsing/first_row_as_headers.example.js) ```javascript -const { EOL } = require('os'); +const { EOL } = require("os"); -const CSV_STRING = [ - 'a,b', - 'a1,b1', - 'a2,b2', -].join(EOL); +const CSV_STRING = ["a,b", "a1,b1", "a2,b2"].join(EOL); const stream = csv .parse({ headers: true }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); ``` Expected output + ``` { a: 'a1', b: 'b1' } { a: 'a2', b: 'b2' } @@ -262,6 +257,7 @@ Parsed 2 rows ``` + ### Custom Headers You may alternatively pass an array of header names. @@ -271,22 +267,18 @@ You may alternatively pass an array of header names. [`examples/parsing/custom_headers.example.js`](../examples/parsing/custom_headers.example.js) ```javascript -const { EOL } = require('os'); +const { EOL } = require("os"); -const CSV_STRING = [ - 'a1,b1', - 'a2,b2', -].join(EOL); +const CSV_STRING = ["a1,b1", "a2,b2"].join(EOL); const stream = csv - .parse({ headers: [ 'a', 'b' ] }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ headers: ["a", "b"] }) + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); - ``` Expected output @@ -298,6 +290,7 @@ Parsed 2 rows ``` + ### Renaming Headers If the CSV contains a header row but you want to provide custom headers you can pass an array of headers, and set `renameHeaders` to true. @@ -305,23 +298,18 @@ If the CSV contains a header row but you want to provide custom headers you can [`examples/parsing/rename_headers.example.js`](../examples/parsing/rename_headers.example.js) ```javascript -const { EOL } = require('os'); +const { EOL } = require("os"); -const CSV_STRING = [ - 'header1,header2', - 'a1,b1', - 'a2,b2', -].join(EOL); +const CSV_STRING = ["header1,header2", "a1,b1", "a2,b2"].join(EOL); const stream = csv - .parse({ headers: [ 'a', 'b' ], renameHeaders: true }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ headers: ["a", "b"], renameHeaders: true }) + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); - ``` Expected output @@ -333,6 +321,7 @@ Parsed 2 rows ``` + ### Skipping Columns To omit some of the data columns you may not need, pass a sparse array as `headers`. @@ -340,20 +329,16 @@ To omit some of the data columns you may not need, pass a sparse array as `heade [`examples/parsing/skipping_columns.example.js`](../examples/parsing/skipping_columns.example.js) ```javascript -const CSV_STRING = [ - 'a1,b1,c1', - 'a2,b2,c2', -].join(EOL); +const CSV_STRING = ["a1,b1,c1", "a2,b2,c2"].join(EOL); const stream = csv - .parse({ headers: [ 'a', undefined, 'c' ] }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ headers: ["a", undefined, "c"] }) + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); - ``` Expected output @@ -365,6 +350,7 @@ Parsed 2 rows ``` + ### Ignoring Empty Rows If your data includes empty rows, the sort Excel might include at the end of the file for instance, you can ignore these by including the `ignoreEmpty` option. @@ -375,22 +361,21 @@ Any rows consisting of nothing but empty strings and/or commas will be skipped, ```javascript const CSV_STRING = [ - 'a1,b1', - ',', //empty row empty colums - 'a2,b2', - ' ,\t', //empty row columns with just white space - '', //empty last line + "a1,b1", + ",", //empty row empty colums + "a2,b2", + " ,\t", //empty row columns with just white space + "" //empty last line ].join(EOL); const stream = csv - .parse({ ignoreEmpty: true }) - .on('error', error => console.error(error)) - .on('data', row => console.log(row)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ ignoreEmpty: true }) + .on("error", error => console.error(error)) + .on("data", row => console.log(row)) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); - ``` Expected output @@ -402,6 +387,7 @@ Parsed 2 rows ``` + ### Transforming You can transform data by providing a transform function. What is returned from the transform function will be provided to validate and emitted as a row. @@ -410,22 +396,22 @@ You can transform data by providing a transform function. What is returned from ```javascript const CSV_STRING = [ - 'firstName,lastName', - 'bob,yukon', - 'sally,yukon', - 'timmy,yukon', + "firstName,lastName", + "bob,yukon", + "sally,yukon", + "timmy,yukon" ].join(EOL); const stream = csv - .parse({ headers: true }) - .transform(data => ({ - firstName: data.firstName.toUpperCase(), - lastName: data.lastName.toUpperCase(), - properName: `${data.firstName} ${data.lastName}`, - })) - .on('error', error => console.error(error)) - .on('data', row => console.log(JSON.stringify(row))) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ headers: true }) + .transform(data => ({ + firstName: data.firstName.toUpperCase(), + lastName: data.lastName.toUpperCase(), + properName: `${data.firstName} ${data.lastName}` + })) + .on("error", error => console.error(error)) + .on("data", row => console.log(JSON.stringify(row))) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); @@ -446,24 +432,26 @@ Parsed 3 rows ```javascript const CSV_STRING = [ - 'firstName,lastName', - 'bob,yukon', - 'sally,yukon', - 'timmy,yukon', + "firstName,lastName", + "bob,yukon", + "sally,yukon", + "timmy,yukon" ].join(EOL); const stream = csv - .parse({ headers: true }) - .transform((data, cb) => { - setImmediate(() => cb(null, { - firstName: data.firstName.toUpperCase(), - lastName: data.lastName.toUpperCase(), - properName: `${data.firstName} ${data.lastName}`, - })); - }) - .on('error', error => console.error(error)) - .on('data', row => console.log(JSON.stringify(row))) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ headers: true }) + .transform((data, cb) => { + setImmediate(() => + cb(null, { + firstName: data.firstName.toUpperCase(), + lastName: data.lastName.toUpperCase(), + properName: `${data.firstName} ${data.lastName}` + }) + ); + }) + .on("error", error => console.error(error)) + .on("data", row => console.log(JSON.stringify(row))) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); @@ -479,6 +467,7 @@ Parsed 3 rows ``` + ### Validation You can validate each row in the CSV by providing a validate handler. If a row is invalid then a `data-invalid` event will be emitted with the row and the index. @@ -487,19 +476,21 @@ You can validate each row in the CSV by providing a validate handler. If a row i ```javascript const CSV_STRING = [ - 'firstName,lastName', - 'bob,yukon', - 'sally,yukon', - 'timmy,yukon', + "firstName,lastName", + "bob,yukon", + "sally,yukon", + "timmy,yukon" ].join(EOL); const stream = csv - .parse({ headers: true }) - .validate(data => data.firstName !== 'bob') - .on('error', error => console.error(error)) - .on('data', row => console.log(`Valid [row=${JSON.stringify(row)}]`)) - .on('data-invalid', (row, rowNumber) => console.log(`Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify(row)}]`)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + .parse({ headers: true }) + .validate(data => data.firstName !== "bob") + .on("error", error => console.error(error)) + .on("data", row => console.log(`Valid [row=${JSON.stringify(row)}]`)) + .on("data-invalid", (row, rowNumber) => + console.log(`Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify(row)}]`) + ) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); @@ -517,22 +508,26 @@ Parsed 2 rows `fast-csv` also supports async validation, with a callback. [`examples/parsing/validate_async.example.js`](../examples/parsing/validate_async.example.js) + ```javascript const CSV_STRING = [ - 'firstName,lastName', - 'bob,yukon', - 'sally,yukon', - 'timmy,yukon', + "firstName,lastName", + "bob,yukon", + "sally,yukon", + "timmy,yukon" ].join(EOL); -const stream = csv.parse({ headers: true }) - .validate((row, cb) => { - setImmediate(() => cb(null, row.firstName !== 'bob')); - }) - .on('error', error => console.error(error)) - .on('data', row => console.log(`Valid [row=${JSON.stringify(row)}]`)) - .on('data-invalid', (row, rowNumber) => console.log(`Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify(row)}]`)) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); +const stream = csv + .parse({ headers: true }) + .validate((row, cb) => { + setImmediate(() => cb(null, row.firstName !== "bob")); + }) + .on("error", error => console.error(error)) + .on("data", row => console.log(`Valid [row=${JSON.stringify(row)}]`)) + .on("data-invalid", (row, rowNumber) => + console.log(`Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify(row)}]`) + ) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); @@ -550,28 +545,34 @@ Parsed 2 rows Sometimes you may wish to provide a reason that the row was invalid, you can use the callback to provide additional info. [`examples/parsing/validate_with_reason.example.js`](../examples/parsing/validate_with_reason.example.js) + ```javascript const CSV_STRING = [ - 'firstName,lastName', - 'bob,yukon', - 'sally,yukon', - 'timmy,yukon', + "firstName,lastName", + "bob,yukon", + "sally,yukon", + "timmy,yukon" ].join(EOL); -const stream = csv.parse({ headers: true }) - .validate((row, cb) => { - const isValid = row.firstName !== 'bob'; - if (!isValid) { - return cb(null, false, 'Name is bob'); - } - return cb(null, true); - }) - .on('error', error => console.error(error)) - .on('data', row => console.log(`Valid [row=${JSON.stringify(row)}]`)) - .on('data-invalid', (row, rowNumber, reason) => { - console.log(`Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify(row)}] [reason=${reason}]`); - }) - .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); +const stream = csv + .parse({ headers: true }) + .validate((row, cb) => { + const isValid = row.firstName !== "bob"; + if (!isValid) { + return cb(null, false, "Name is bob"); + } + return cb(null, true); + }) + .on("error", error => console.error(error)) + .on("data", row => console.log(`Valid [row=${JSON.stringify(row)}]`)) + .on("data-invalid", (row, rowNumber, reason) => { + console.log( + `Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify( + row + )}] [reason=${reason}]` + ); + }) + .on("end", rowCount => console.log(`Parsed ${rowCount} rows`)); stream.write(CSV_STRING); stream.end(); @@ -585,4 +586,3 @@ Valid [row={"firstName":"sally","lastName":"yukon"}] Valid [row={"firstName":"timmy","lastName":"yukon"}] Parsed 2 rows ``` - diff --git a/src/parser/ParserOptions.ts b/src/parser/ParserOptions.ts index 350a24d8..974a8d38 100644 --- a/src/parser/ParserOptions.ts +++ b/src/parser/ParserOptions.ts @@ -1,6 +1,6 @@ import { escapeRegExp, isString, isNil } from 'lodash'; -export interface ParserOptionsArgs{ +export interface ParserOptionsArgs { objectMode?: boolean; delimiter?: string; quote?: string | null; @@ -15,6 +15,7 @@ export interface ParserOptionsArgs{ ltrim?: boolean; rtrim?: boolean; encoding?: string; + maxLines?: number; } export class ParserOptions { @@ -36,7 +37,7 @@ export class ParserOptions { public readonly supportsComments: boolean = false; - public readonly ltrim: boolean = false ; + public readonly ltrim: boolean = false; public readonly rtrim: boolean = false; @@ -56,6 +57,10 @@ export class ParserOptions { public readonly encoding: string = 'utf8'; + public readonly limitRows: boolean = false; + + public readonly maxRows: number = -1; + public constructor(opts?: ParserOptionsArgs) { Object.assign(this, opts || {}); if (this.delimiter.length > 1) { @@ -65,5 +70,9 @@ export class ParserOptions { this.escapeChar = isString(this.escape) ? this.escape : this.quote; this.supportsComments = !isNil(this.comment); this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter})`); + + if (this.maxRows > 0) { + this.limitRows = true; + } } } diff --git a/src/parser/parser/Parser.ts b/src/parser/parser/Parser.ts index d00189b9..219cf1a7 100644 --- a/src/parser/parser/Parser.ts +++ b/src/parser/parser/Parser.ts @@ -13,7 +13,7 @@ export default class Parser { private static removeBOM(line: string): string { // Catches EFBBBF (UTF-8 BOM) because the buffer-to-string // conversion translates it to FEFF (UTF-16 BOM) - if (line && line.charCodeAt(0) === 0xFEFF) { + if (line && line.charCodeAt(0) === 0xfeff) { return line.slice(1); } return line; @@ -69,9 +69,11 @@ export default class Parser { return { line: scanner.line, rows }; } + private rowCount: number = 0; + private parseRow(scanner: Scanner, rows: RowArray[]): boolean { const nextToken = scanner.nextNonSpaceToken; - if (!nextToken) { + if (!nextToken || (this.parserOptions.limitRows && this.rowCount >= this.parserOptions.maxRows)) { return false; } const row = this.rowParser.parse(scanner); @@ -82,6 +84,7 @@ export default class Parser { return true; } rows.push(row); + this.rowCount += 1; return true; } }