diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 10ca6b7f..0e12497a 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -4,23 +4,23 @@ on: [push] jobs: build: - strategy: + fail-fast: false matrix: os: [ubuntu-latest] - node-version: [12.x, 14.x] + node-version: [12.x, 14.x, 16.x] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v1 - - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v1 - with: - node-version: ${{ matrix.node-version }} - - name: yarn install and test - run: | - yarn install --frozen-lockfile - yarn test - env: - CI: true + - uses: actions/checkout@v1 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v1 + with: + node-version: ${{ matrix.node-version }} + - name: yarn install and test + run: | + yarn install --frozen-lockfile + yarn test + env: + CI: true diff --git a/README.md b/README.md index add80855..ae8b1eea 100644 --- a/README.md +++ b/README.md @@ -379,6 +379,15 @@ Returns a promise to the file loaded as a [SQLite database client](https://obser const db = await FileAttachment("chinook.db").sqlite(); ``` +# *attachment*.xlsx() [<>](https://github.com/observablehq/stdlib/blob/master/src/xlsx.js "Source") + +Returns a promise to the file loaded as a [Workbook](https://observablehq.com/@observablehq/xlsx). + +```js +const workbook = await FileAttachment("profit-and-loss.xlsx").xlsx(); +const sheet = workbook.sheet("Sheet1", {range: "B4:AF234", headers: true}); +``` + # *attachment*.xml() [<>](https://github.com/observablehq/stdlib/blob/master/src/fileAttachment.js "Source") Returns a promise to an [XMLDocument](https://developer.mozilla.org/en-US/docs/Web/API/XMLDocument) containing the contents of the file. diff --git a/package.json b/package.json index f7de5be5..e92ee190 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "url": "https://github.com/observablehq/stdlib.git" }, "scripts": { - "test": "tap 'test/**/*-test.js'", + "test": "tap 'test/**/*-test.js' --reporter classic", "prepublishOnly": "rollup -c", "postpublish": "git push && git push --tags" }, diff --git a/src/dependencies.js b/src/dependencies.js index ae845fc3..e721f673 100644 --- a/src/dependencies.js +++ b/src/dependencies.js @@ -16,3 +16,4 @@ export const vegaliteApi = dependency("vega-lite-api", "5.0.0", "build/vega-lite export const arrow = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js"); export const arquero = dependency("arquero", "4.8.4", "dist/arquero.min.js"); export const topojson = dependency("topojson-client", "3.1.0", "dist/topojson-client.min.js"); +export const exceljs = dependency("exceljs", "4.3.0", "dist/exceljs.min.js"); diff --git a/src/fileAttachment.js b/src/fileAttachment.js index ac2c4d44..90d0d03e 100644 --- a/src/fileAttachment.js +++ b/src/fileAttachment.js @@ -1,7 +1,8 @@ import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv"; import {require as requireDefault} from "d3-require"; -import {arrow, jszip} from "./dependencies.js"; +import {arrow, jszip, exceljs} from "./dependencies.js"; import {SQLiteDatabaseClient} from "./sqlite.js"; +import {Workbook} from "./xlsx.js"; async function remote_fetch(file) { const response = await fetch(await file.url()); @@ -70,6 +71,10 @@ class AbstractFile { async html() { return this.xml("text/html"); } + async xlsx() { + const [ExcelJS, buffer] = await Promise.all([requireDefault(exceljs.resolve()), this.arrayBuffer()]); + return new Workbook(await new ExcelJS.Workbook().xlsx.load(buffer)); + } } class FileAttachment extends AbstractFile { diff --git a/src/xlsx.js b/src/xlsx.js new file mode 100644 index 00000000..86203268 --- /dev/null +++ b/src/xlsx.js @@ -0,0 +1,104 @@ +export class Workbook { + constructor(workbook) { + Object.defineProperties(this, { + _: {value: workbook}, + sheetNames: { + value: workbook.worksheets.map((s) => s.name), + enumerable: true, + }, + }); + } + sheet(name, options) { + const sname = + typeof name === "number" + ? this.sheetNames[name] + : this.sheetNames.includes((name += "")) + ? name + : null; + if (sname == null) throw new Error(`Sheet not found: ${name}`); + const sheet = this._.getWorksheet(sname); + return extract(sheet, options); + } +} + +function extract(sheet, {range, headers = false} = {}) { + let [[c0, r0], [c1, r1]] = parseRange(range, sheet); + const headerRow = headers && sheet._rows[r0++]; + let names = new Set(["#"]); + for (let n = c0; n <= c1; n++) { + let name = (headerRow ? valueOf(headerRow._cells[n]) : null) || toColumn(n); + while (names.has(name)) name += "_"; + names.add(name); + } + names = new Array(c0).concat(Array.from(names)); + + const output = new Array(r1 - r0 + 1); + for (let r = r0; r <= r1; r++) { + const row = (output[r - r0] = Object.defineProperty({}, "#", { + value: r + 1, + })); + const _row = sheet._rows[r]; + if (_row && _row.hasValues) + for (let c = c0; c <= c1; c++) { + const value = valueOf(_row._cells[c]); + if (value != null) row[names[c + 1]] = value; + } + } + + output.columns = names.filter(() => true); // Filter sparse columns + return output; +} + +function valueOf(cell) { + if (!cell) return; + const {value} = cell; + if (value && value instanceof Date) return value; + if (value && typeof value === "object") { + if (value.formula || value.sharedFormula) + return value.result && value.result.error ? NaN : value.result; + if (value.richText) return value.richText.map((d) => d.text).join(""); + if (value.text) { + let {text} = value; + if (text.richText) text = text.richText.map((d) => d.text).join(""); + return value.hyperlink && value.hyperlink !== text + ? `${value.hyperlink} ${text}` + : text; + } + return value; + } + return value; +} + +function parseRange(specifier = ":", {columnCount, rowCount}) { + specifier += ""; + if (!specifier.match(/^[A-Z]*\d*:[A-Z]*\d*$/)) + throw new Error("Malformed range specifier"); + const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] = + specifier.split(":").map(fromCellReference); + return [ + [c0, r0], + [c1, r1], + ]; +} + +// Returns the default column name for a zero-based column index. +// For example: 0 -> "A", 1 -> "B", 25 -> "Z", 26 -> "AA", 27 -> "AB". +function toColumn(c) { + let sc = ""; + c++; + do { + sc = String.fromCharCode(64 + (c % 26 || 26)) + sc; + } while ((c = Math.floor((c - 1) / 26))); + return sc; +} + +// Returns the zero-based indexes from a cell reference. +// For example: "A1" -> [0, 0], "B2" -> [1, 1], "AA10" -> [26, 9]. +function fromCellReference(s) { + const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/); + let c = 0; + if (sc) + for (let i = 0; i < sc.length; i++) + c += Math.pow(26, sc.length - i - 1) * (sc.charCodeAt(i) - 64); + return [c ? c - 1 : undefined, sr ? +sr - 1 : undefined]; +} diff --git a/test/xlsx-test.js b/test/xlsx-test.js new file mode 100644 index 00000000..773661a2 --- /dev/null +++ b/test/xlsx-test.js @@ -0,0 +1,278 @@ +import {test} from "tap"; +import {Workbook} from "../src/xlsx.js"; + +function mockWorkbook(contents, overrides = {}) { + return { + worksheets: Object.keys(contents).map((name) => ({name})), + getWorksheet(name) { + const _rows = contents[name]; + return Object.assign( + { + _rows: _rows.map((row) => ({ + _cells: row.map((cell) => ({value: cell})), + hasValues: !!row.length, + })), + rowCount: _rows.length, + columnCount: Math.max(..._rows.map((r) => r.length)), + }, + overrides + ); + }, + }; +} + +test("FileAttachment.xlsx reads sheet names", (t) => { + const workbook = new Workbook(mockWorkbook({Sheet1: []})); + t.same(workbook.sheetNames, ["Sheet1"]); + t.end(); +}); + +test("FileAttachment.xlsx sheet(name) throws on unknown sheet name", (t) => { + const workbook = new Workbook(mockWorkbook({Sheet1: []})); + t.throws(() => workbook.sheet("bad")); + t.end(); +}); + +test("FileAttachment.xlsx reads sheets", (t) => { + const workbook = new Workbook( + mockWorkbook({ + Sheet1: [ + ["one", "two", "three"], + [1, 2, 3], + ], + }) + ); + t.same(workbook.sheet(0), [ + {A: "one", B: "two", C: "three"}, + {A: 1, B: 2, C: 3}, + ]); + t.same(workbook.sheet("Sheet1"), [ + {A: "one", B: "two", C: "three"}, + {A: 1, B: 2, C: 3}, + ]); + t.end(); +}); + +test("FileAttachment.xlsx reads sheets with different types", (t) => { + t.same( + new Workbook( + mockWorkbook({ + Sheet1: [ + [], + [null, undefined], + ["hello", "", "0", "1"], + [1, 1.2], + [true, false], + [new Date(Date.UTC(2020, 0, 1)), {}], + ], + }) + ).sheet(0), + [ + {}, + {}, + {A: "hello", B: "", C: "0", D: "1"}, + {A: 1, B: 1.2}, + {A: true, B: false}, + {A: new Date(Date.UTC(2020, 0, 1)), B: {}}, + ], + "nullish, strings, numbers, booleans, dates, objects" + ); + t.same( + new Workbook( + mockWorkbook({ + Sheet1: [ + [ + {richText: [{text: "two"}, {text: "three"}]}, // A + {text: "plain text"}, // B + {text: "https://example.com", hyperlink: "https://example.com"}, // C + { + text: {richText: [{text: "https://example.com"}]}, // D + hyperlink: "https://example.com", + }, + {text: `link&"'?`, hyperlink: 'https://example.com?q="'}, // E + { + text: {richText: [{text: "first"}, {text: "second"}]}, // F + hyperlink: "https://example.com", + }, + ], + ], + }) + ).sheet(0), + [ + { + A: "twothree", + B: "plain text", + C: "https://example.com", + D: "https://example.com", + E: `https://example.com?q=" link&"'?`, + F: "https://example.com firstsecond", + }, + ], + "rich text, text, hyperlink text" + ); + t.same( + new Workbook( + mockWorkbook({ + Sheet1: [ + [ + {formula: "=B2*5", result: 10}, + {sharedFormula: "=B2*6", result: 12}, + {sharedFormula: "=Z2*6", result: {error: "#REF!"}}, + ], + ], + }) + ).sheet(0), + [{A: 10, B: 12, C: NaN}], + "formula results, errors" + ); + + t.end(); +}); + +test("FileAttachment.xlsx reads sheets with headers", (t) => { + const workbook = new Workbook( + mockWorkbook({ + Sheet1: [ + [null, "one", "one", "two", "A", "0"], + [1, null, 3, 4, 5, "zero"], + [6, 7, 8, 9, 10], + ], + }) + ); + t.same(workbook.sheet(0, {headers: true}), [ + {A: 1, one_: 3, two: 4, A_: 5, 0: "zero"}, + {A: 6, one: 7, one_: 8, two: 9, A_: 10}, + ]); + t.same(workbook.sheet(0, {headers: true}).columns, [ + "#", + "A", + "one", + "one_", + "two", + "A_", + "0", + ]); + t.end(); +}); + +test("FileAttachment.xlsx throws on invalid ranges", (t) => { + const workbook = new Workbook(mockWorkbook({Sheet1: []})); + const malformed = new Error("Malformed range specifier"); + + t.throws(() => t.same(workbook.sheet(0, {range: ""})), malformed); + t.throws(() => t.same(workbook.sheet(0, {range: "-:"})), malformed); + t.throws(() => t.same(workbook.sheet(0, {range: " :"})), malformed); + t.throws( + () => t.same(workbook.sheet(0, {range: "a1:"})), + malformed, + "lowercase" + ); + t.throws(() => t.same(workbook.sheet(0, {range: "1A:"})), malformed); + + t.end(); +}); + +test("FileAttachment.xlsx reads sheet ranges", (t) => { + const workbook = new Workbook( + mockWorkbook({ + Sheet1: [ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + ], + }) + ); + + // undefined + // ":" + const entireSheet = [ + {A: 0, B: 1, C: 2, D: 3, E: 4, F: 5, G: 6, H: 7, I: 8, J: 9}, + {A: 10, B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, + {A: 20, B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, + {A: 30, B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, + ]; + t.same(workbook.sheet(0), entireSheet); + t.same(workbook.sheet(0, {range: ":"}), entireSheet); + t.same(workbook.sheet(0).columns, ["#", ...Object.keys(entireSheet[0])]); + + // "B2:C3" + t.same(workbook.sheet(0, {range: "B2:C3"}), [ + {B: 11, C: 12}, + {B: 21, C: 22}, + ]); + + // ":C3" + t.same(workbook.sheet(0, {range: ":C3"}), [ + {A: 0, B: 1, C: 2}, + {A: 10, B: 11, C: 12}, + {A: 20, B: 21, C: 22}, + ]); + + // "B2:" + t.same(workbook.sheet(0, {range: "B2:"}), [ + {B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, + {B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, + {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, + ]); + + // "H:" + t.same(workbook.sheet(0, {range: "H:"}), [ + {H: 7, I: 8, J: 9}, + {H: 17, I: 18, J: 19}, + {H: 27, I: 28, J: 29}, + {H: 37, I: 38, J: 39}, + ]); + + // ":C" + t.same(workbook.sheet(0, {range: ":C"}), [ + {A: 0, B: 1, C: 2}, + {A: 10, B: 11, C: 12}, + {A: 20, B: 21, C: 22}, + {A: 30, B: 31, C: 32}, + ]); + + // ":Z" + t.same(workbook.sheet(0, {range: ":Z"}), entireSheet); + t.same( + workbook.sheet(0, {range: ":Z"}).columns, + "#ABCDEFGHIJKLMNOPQRSTUVWXYZ".split("") + ); + + // "2:" + t.same(workbook.sheet(0, {range: "2:"}), entireSheet.slice(1)); + + // ":2" + t.same(workbook.sheet(0, {range: ":2"}), entireSheet.slice(0, 2)); + + t.end(); +}); + +test("FileAttachment.xlsx throws on unknown range specifier", (t) => { + const workbook = new Workbook(mockWorkbook({Sheet1: []})); + t.throws(() => workbook.sheet(0, {range: 0})); + t.end(); +}); + +test("FileAttachment.xlsx derives column names such as A AA AAA…", (t) => { + const l0 = 26 * 26 * 26 + 26 * 26 + 26; + const workbook = new Workbook( + mockWorkbook({ + Sheet1: [Array.from({length: l0}).fill(1)], + }) + ); + t.same( + workbook.sheet(0, {headers: false}).columns.filter((d) => d.match(/^A*$/)), + ["A", "AA", "AAA"] + ); + const workbook1 = new Workbook( + mockWorkbook({ + Sheet1: [Array.from({length: l0 + 1}).fill(1)], + }) + ); + t.same( + workbook1.sheet(0, {headers: false}).columns.filter((d) => d.match(/^A*$/)), + ["A", "AA", "AAA", "AAAA"] + ); + t.end(); +});