Skip to content

Commit

Permalink
Merge pull request #294 from C2FO/pr277
Browse files Browse the repository at this point in the history
Pr277
  • Loading branch information
doug-martin authored Dec 15, 2019
2 parents b0d60ac + 06eab1b commit f49f541
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 16 deletions.
4 changes: 4 additions & 0 deletions History.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# v3.5.1

* [ADDED] `maxRows` option to limit the number of rows parsed. [#275](https://github.com/C2FO/fast-csv/issues/275) [#277](https://github.com/C2FO/fast-csv/pull/277) - [@cbrittingham](https://github.com/cbrittingham)

# v3.5.0

* Upgraded dependencies
Expand Down
4 changes: 3 additions & 1 deletion benchmark/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module.exports = {
parserOptions: {
project: null,
},
rules: {
"no-console": 0,
"@typescript-eslint/no-var-requires": 0
},
};
20 changes: 10 additions & 10 deletions benchmark/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ const path = require('path');
const fs = require('fs');
const fastCsv = require('..');


function camelize(str) {
return str.replace(/_(.)/g, (a, b) => b.toUpperCase());
}
Expand All @@ -11,7 +10,7 @@ const promisfyStream = (stream, expectedRows) => {
let count = 0;
return new Promise((res, rej) => {
stream
.on('data', (row) => {
.on('data', row => {
count += 1;
})
.on('end', () => {
Expand All @@ -25,13 +24,14 @@ const promisfyStream = (stream, expectedRows) => {
});
};

const benchmarkFastCsv = type => (num) => {
const benchmarkFastCsv = type => num => {
const file = path.resolve(__dirname, `./assets/${num}.${type}.csv`);
const stream = fs.createReadStream(file)
.pipe(fastCsv.parse({ headers: true }))
.transform((data) => {
const stream = fs
.createReadStream(file)
.pipe(fastCsv.parse({ headers: true, maxRows: 10 }))
.transform(data => {
const ret = {};
[ 'first_name', 'last_name', 'email_address' ].forEach((prop) => {
['first_name', 'last_name', 'email_address'].forEach(prop => {
ret[camelize(prop)] = data[prop];
});
ret.address = data.address;
Expand All @@ -47,15 +47,15 @@ async function benchmarkRun(title, num, m) {
for (let i = 0; i < howMany; i += 1) {
// eslint-disable-next-line no-await-in-loop
await m(num);
console.log('%s: RUN(%d lines) 1 %dms', title, num, (new Date() - runStart));
console.log('%s: RUN(%d lines) 1 %dms', title, num, new Date() - runStart);
runStart = new Date();
}
console.log('%s: 3xAVG for %d lines %dms', title, num, (new Date() - start) / howMany);
}

function runBenchmarks(num, type) {
console.log(`\nRUNNING ${num}.${type}.csv benchmarks`, num);
return benchmarkRun('fast-csv', num, benchmarkFastCsv(type))
return benchmarkRun('fast-csv', num, benchmarkFastCsv(type));
}

function benchmarks(type) {
Expand All @@ -67,7 +67,7 @@ function benchmarks(type) {
benchmarks('nonquoted')
.then(() => benchmarks('quoted'))
.then(() => process.exit())
.catch((e) => {
.catch(e => {
console.error(e.stack);
return process.exit(1);
});
43 changes: 43 additions & 0 deletions docs/parsing.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* [Ignoring Empty Rows](#csv-parse-ignoring-empty-rows)
* [Transforming Rows](#csv-parse-transforming)
* [Validating Rows](#csv-parse-validation)
* [Max Rows](#max-rows)

<a name="parsing-options"></a>
## Options
Expand Down Expand Up @@ -45,6 +46,7 @@
* `rtrim: {boolean} = false`: Set to `true` to right trim all fields.
* `ltrim: {boolean} = false`: Set to `true` to left trim all fields.
* `encoding: {string} = 'utf8'`: Passed to [StringDecoder](https://nodejs.org/api/string_decoder.html#string_decoder_new_stringdecoder_encoding) when decoding incoming buffers. Change if incoming content is not 'utf8' encoded.
- `maxRows: {number}`: If number is `> 0` the specified number of rows will be parsed.(e.g. `100` would return the first 100 rows of data).

<a name="parsing-events"></a>
## Events
Expand Down Expand Up @@ -585,3 +587,44 @@ Valid [row={"firstName":"timmy","lastName":"yukon"}]
Parsed 2 rows
```

<a name="max-rows"></a>
[`examples/parsing/max_rows.example.example.js`](../examples/parsing/max_rows.example.js)

In the following example there are 10 rows, but only 5 will be parsed because of the `maxRows` option.

```javascript
const rows = [
'header1,header2\n',
'col1,col1\n',
'col2,col2\n',
'col3,col3\n',
'col4,col4\n',
'col5,col5\n',
'col6,col6\n',
'col7,col7\n',
'col8,col8\n',
'col9,col9\n',
'col10,col10',
];

const stream = csv
.parse({ headers: true, maxRows: 5 })
.on('error', error => console.error(error))
.on('data', row => console.log(row))
.on('end', rowCount => console.log(`Parsed ${rowCount} rows`));

rows.forEach(row => stream.write(row));
stream.end();
```

Expected output

```
{ header1: 'col1', header2: 'col1' }
{ header1: 'col2', header2: 'col2' }
{ header1: 'col3', header2: 'col3' }
{ header1: 'col4', header2: 'col4' }
{ header1: 'col5', header2: 'col5' }
Parsed 5 rows
```

24 changes: 24 additions & 0 deletions examples/parsing/max_rows.example.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const csv = require('../../');

const rows = [
'header1,header2\n',
'col1,col1\n',
'col2,col2\n',
'col3,col3\n',
'col4,col4\n',
'col5,col5\n',
'col6,col6\n',
'col7,col7\n',
'col8,col8\n',
'col9,col9\n',
'col10,col10',
];

const stream = csv
.parse({ headers: true, maxRows: 5 })
.on('error', error => console.error(error))
.on('data', row => console.log(row))
.on('end', rowCount => console.log(`Parsed ${rowCount} rows`));

rows.forEach(row => stream.write(row));
stream.end();
24 changes: 19 additions & 5 deletions src/parser/CsvParserStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ export default class CsvParserStream extends Transform {
this.rowTransformerValidator = new RowTransformerValidator();
}

private get hasHitRowLimit(): boolean {
return this.parserOptions.limitRows && this.rowCount >= this.parserOptions.maxRows;
}

public transform(transformFunction: RowTransformFunction): CsvParserStream {
this.rowTransformerValidator.rowTransform = transformFunction;
return this;
Expand All @@ -54,23 +58,31 @@ export default class CsvParserStream extends Transform {
}

public _transform(data: Buffer, encoding: string, done: TransformCallback): void {
// if we have hit our maxRows parsing limit then skip parsing
if (this.hasHitRowLimit) {
return done();
}
try {
const { lines } = this;
const newLine = lines + this.decoder.write(data);
const rows = this.parse(newLine, true);
this.processRows(rows, done);
return this.processRows(rows, done);
} catch (e) {
done(e);
return done(e);
}
}

public _flush(done: TransformCallback): void {
// if we have hit our maxRows parsing limit then skip parsing
if (this.hasHitRowLimit) {
return done();
}
try {
const newLine = this.lines + this.decoder.end();
const rows = this.parse(newLine, false);
this.processRows(rows, done);
return this.processRows(rows, done);
} catch (e) {
done(e);
return done(e);
}
}

Expand All @@ -86,7 +98,9 @@ export default class CsvParserStream extends Transform {
private processRows(rows: string[][], cb: TransformCallback): void {
const rowsLength = rows.length;
const iterate = (i: number): void => {
if (i >= rowsLength) {
// if we have emitted all rows or we have hit the maxRows limit option
// then end
if (i >= rowsLength || this.hasHitRowLimit) {
return cb();
}
const row = rows[i];
Expand Down
9 changes: 9 additions & 0 deletions src/parser/ParserOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export interface ParserOptionsArgs {
ltrim?: boolean;
rtrim?: boolean;
encoding?: string;
maxRows?: number;
}

export class ParserOptions {
Expand Down Expand Up @@ -57,6 +58,10 @@ export class ParserOptions {

public readonly encoding: string = 'utf8';

public readonly limitRows: boolean = false;

public readonly maxRows: number = 0;

public constructor(opts?: ParserOptionsArgs) {
Object.assign(this, opts || {});
if (this.delimiter.length > 1) {
Expand All @@ -66,5 +71,9 @@ export class ParserOptions {
this.escapeChar = this.escape ?? this.quote;
this.supportsComments = !isNil(this.comment);
this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter})`);

if (this.maxRows > 0) {
this.limitRows = true;
}
}
}
18 changes: 18 additions & 0 deletions test/parser/CsvParsingStream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,24 @@ describe('CsvParserStream', () => {
});
});

describe('maxRows', () => {
it('should parse up to the specified number of maxRows', () => {
const maxRows = 3;
parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => {
assert.deepStrictEqual(rows, assets.withHeaders.parsed.slice(0, maxRows));
assert.strictEqual(count, maxRows);
});
});

it('should parse all rows if maxRows === 0', () => {
const maxRows = 0;
parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => {
assert.deepStrictEqual(rows, assets.withHeaders.parsed);
assert.strictEqual(count, rows.length);
});
});
});

it('should emit an error for malformed rows', next => {
assets.write(assets.malformed);
const stream = csv.parseFile(assets.malformed.path, { headers: true });
Expand Down
20 changes: 20 additions & 0 deletions test/parser/ParserOptions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,24 @@ describe('ParserOptions', () => {
assert.strictEqual(createOptions({ renameHeaders: false }).renameHeaders, false);
});
});

describe('#maxRows', () => {
it('should default maxRows 0 and limitRows to false', () => {
const opts = createOptions();
assert.strictEqual(opts.maxRows, 0);
assert.strictEqual(opts.limitRows, false);
});

it('should set maxRows to the provided option and limitRows to true if maxRows > 0', () => {
const opts = createOptions({ maxRows: 1 });
assert.strictEqual(opts.maxRows, 1);
assert.strictEqual(opts.limitRows, true);
});

it('should set maxRows to the provided option and limitRows to true if maxRows === 0', () => {
const opts = createOptions({ maxRows: 0 });
assert.strictEqual(opts.maxRows, 0);
assert.strictEqual(opts.limitRows, false);
});
});
});

0 comments on commit f49f541

Please sign in to comment.