Skip to content

Commit

Permalink
feat(encoding/csv): sync parse (#2491)
Browse files Browse the repository at this point in the history
  • Loading branch information
timreichen authored Aug 13, 2022
1 parent f693ad6 commit 33afdfe
Show file tree
Hide file tree
Showing 3 changed files with 468 additions and 309 deletions.
166 changes: 26 additions & 140 deletions encoding/csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,18 @@
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.

// This module is browser compatible.

/** Port of the Go
* [encoding/csv](https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/)
* library.
*
* @module
*/

import { BufReader } from "../io/buffer.ts";
import { TextProtoReader } from "../textproto/mod.ts";
import { StringReader } from "../io/readers.ts";
import { assert } from "../_util/assert.ts";
import {
ERR_FIELD_COUNT,
ERR_INVALID_DELIM,
ParseError,
readRecord,
} from "./csv/_io.ts";
import type { LineReader, ReadOptions } from "./csv/_io.ts";
import type { ReadOptions } from "./csv/_io.ts";
import { Parser } from "./csv/_parser.ts";

export {
ERR_BARE_QUOTE,
Expand All @@ -38,111 +32,6 @@ export type {
StringifyOptions,
} from "./csv_stringify.ts";

class TextProtoLineReader implements LineReader {
#tp: TextProtoReader;
constructor(bufReader: BufReader) {
this.#tp = new TextProtoReader(bufReader);
}

async readLine() {
let line: string;
const r = await this.#tp.readLine();
if (r === null) return null;
line = r;

// For backwards compatibility, drop trailing \r before EOF.
if (
(await this.isEOF()) && line.length > 0 && line[line.length - 1] === "\r"
) {
line = line.substring(0, line.length - 1);
}

// Normalize \r\n to \n on all input lines.
if (
line.length >= 2 &&
line[line.length - 2] === "\r" &&
line[line.length - 1] === "\n"
) {
line = line.substring(0, line.length - 2);
line = line + "\n";
}

return line;
}

async isEOF() {
return (await this.#tp.r.peek(0)) === null;
}
}

const INVALID_RUNE = ["\r", "\n", '"'];

function chkOptions(opt: ReadOptions): void {
if (!opt.separator) {
opt.separator = ",";
}
if (!opt.trimLeadingSpace) {
opt.trimLeadingSpace = false;
}
if (
INVALID_RUNE.includes(opt.separator) ||
(typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
opt.separator === opt.comment
) {
throw new Error(ERR_INVALID_DELIM);
}
}

/**
* Parse the CSV from the `reader` with the options provided and return `string[][]`.
*
* @param reader provides the CSV data to parse
* @param opt controls the parsing behavior
*/
export async function readMatrix(
reader: BufReader,
opt: ReadOptions = {
separator: ",",
trimLeadingSpace: false,
lazyQuotes: false,
},
): Promise<string[][]> {
const result: string[][] = [];
let _nbFields: number | undefined;
let lineResult: string[];
let first = true;
let lineIndex = 0;
chkOptions(opt);

const lineReader = new TextProtoLineReader(reader);
for (;;) {
const r = await readRecord(lineIndex, lineReader, opt);
if (r === null) break;
lineResult = r;
lineIndex++;
// If fieldsPerRecord is 0, Read sets it to
// the number of fields in the first record
if (first) {
first = false;
if (opt.fieldsPerRecord !== undefined) {
if (opt.fieldsPerRecord === 0) {
_nbFields = lineResult.length;
} else {
_nbFields = opt.fieldsPerRecord;
}
}
}

if (lineResult.length > 0) {
if (_nbFields && _nbFields !== lineResult.length) {
throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT);
}
result.push(lineResult);
}
}
return result;
}

/**
* Parse the CSV string/buffer with the options provided.
*
Expand Down Expand Up @@ -173,46 +62,43 @@ export interface ParseOptions extends ReadOptions {
/**
* Csv parse helper to manipulate data.
* Provides an auto/custom mapper for columns.
* @param input Input to parse. Can be a string or BufReader.
* @param input Input to parse.
* @param opt options of the parser.
* @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`.
* If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record<string, unkown>[]`.
*/
export async function parse(
input: string | BufReader,
): Promise<string[][]>;
export async function parse(
input: string | BufReader,
export function parse(
input: string,
): string[][];
export function parse(
input: string,
opt: Omit<ParseOptions, "columns" | "skipFirstRow">,
): Promise<string[][]>;
export async function parse(
input: string | BufReader,
): string[][];
export function parse(
input: string,
opt: Omit<ParseOptions, "columns"> & {
columns: string[] | ColumnOptions[];
},
): Promise<Record<string, unknown>[]>;
export async function parse(
input: string | BufReader,
): Record<string, unknown>[];
export function parse(
input: string,
opt: Omit<ParseOptions, "skipFirstRow"> & {
skipFirstRow: true;
},
): Promise<Record<string, unknown>[]>;
export async function parse(
input: string | BufReader,
): Record<string, unknown>[];
export function parse(
input: string,
opt: ParseOptions,
): Promise<string[][] | Record<string, unknown>[]>;
export async function parse(
input: string | BufReader,
): string[][] | Record<string, unknown>[];
export function parse(
input: string,
opt: ParseOptions = {
skipFirstRow: false,
},
): Promise<string[][] | Record<string, unknown>[]> {
let r: string[][];
if (input instanceof BufReader) {
r = await readMatrix(input, opt);
} else {
r = await readMatrix(new BufReader(new StringReader(input)), opt);
}
): string[][] | Record<string, unknown>[] {
const parser = new Parser(opt);
const r = parser.parse(input);

if (opt.skipFirstRow || opt.columns) {
let headers: ColumnOptions[] = [];
let i = 0;
Expand Down
Loading

0 comments on commit 33afdfe

Please sign in to comment.