diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml
index 10ca6b7f..0e12497a 100644
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -4,23 +4,23 @@ on: [push]
jobs:
build:
-
strategy:
+ fail-fast: false
matrix:
os: [ubuntu-latest]
- node-version: [12.x, 14.x]
+ node-version: [12.x, 14.x, 16.x]
runs-on: ${{ matrix.os }}
steps:
- - uses: actions/checkout@v1
- - name: Use Node.js ${{ matrix.node-version }}
- uses: actions/setup-node@v1
- with:
- node-version: ${{ matrix.node-version }}
- - name: yarn install and test
- run: |
- yarn install --frozen-lockfile
- yarn test
- env:
- CI: true
+ - uses: actions/checkout@v1
+ - name: Use Node.js ${{ matrix.node-version }}
+ uses: actions/setup-node@v1
+ with:
+ node-version: ${{ matrix.node-version }}
+ - name: yarn install and test
+ run: |
+ yarn install --frozen-lockfile
+ yarn test
+ env:
+ CI: true
diff --git a/README.md b/README.md
index add80855..ae8b1eea 100644
--- a/README.md
+++ b/README.md
@@ -379,6 +379,15 @@ Returns a promise to the file loaded as a [SQLite database client](https://obser
const db = await FileAttachment("chinook.db").sqlite();
```
+# *attachment*.xlsx() [<>](https://github.com/observablehq/stdlib/blob/master/src/xlsx.js "Source")
+
+Returns a promise to the file loaded as a [Workbook](https://observablehq.com/@observablehq/xlsx).
+
+```js
+const workbook = await FileAttachment("profit-and-loss.xlsx").xlsx();
+const sheet = workbook.sheet("Sheet1", {range: "B4:AF234", headers: true});
+```
+
# *attachment*.xml() [<>](https://github.com/observablehq/stdlib/blob/master/src/fileAttachment.js "Source")
Returns a promise to an [XMLDocument](https://developer.mozilla.org/en-US/docs/Web/API/XMLDocument) containing the contents of the file.
diff --git a/package.json b/package.json
index f7de5be5..e92ee190 100644
--- a/package.json
+++ b/package.json
@@ -13,7 +13,7 @@
"url": "https://github.com/observablehq/stdlib.git"
},
"scripts": {
- "test": "tap 'test/**/*-test.js'",
+ "test": "tap 'test/**/*-test.js' --reporter classic",
"prepublishOnly": "rollup -c",
"postpublish": "git push && git push --tags"
},
diff --git a/src/dependencies.js b/src/dependencies.js
index ae845fc3..e721f673 100644
--- a/src/dependencies.js
+++ b/src/dependencies.js
@@ -16,3 +16,4 @@ export const vegaliteApi = dependency("vega-lite-api", "5.0.0", "build/vega-lite
export const arrow = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
export const arquero = dependency("arquero", "4.8.4", "dist/arquero.min.js");
export const topojson = dependency("topojson-client", "3.1.0", "dist/topojson-client.min.js");
+export const exceljs = dependency("exceljs", "4.3.0", "dist/exceljs.min.js");
diff --git a/src/fileAttachment.js b/src/fileAttachment.js
index ac2c4d44..90d0d03e 100644
--- a/src/fileAttachment.js
+++ b/src/fileAttachment.js
@@ -1,7 +1,8 @@
import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv";
import {require as requireDefault} from "d3-require";
-import {arrow, jszip} from "./dependencies.js";
+import {arrow, jszip, exceljs} from "./dependencies.js";
import {SQLiteDatabaseClient} from "./sqlite.js";
+import {Workbook} from "./xlsx.js";
async function remote_fetch(file) {
const response = await fetch(await file.url());
@@ -70,6 +71,10 @@ class AbstractFile {
async html() {
return this.xml("text/html");
}
+ async xlsx() {
+ const [ExcelJS, buffer] = await Promise.all([requireDefault(exceljs.resolve()), this.arrayBuffer()]);
+ return new Workbook(await new ExcelJS.Workbook().xlsx.load(buffer));
+ }
}
class FileAttachment extends AbstractFile {
diff --git a/src/xlsx.js b/src/xlsx.js
new file mode 100644
index 00000000..86203268
--- /dev/null
+++ b/src/xlsx.js
@@ -0,0 +1,104 @@
+export class Workbook {
+ constructor(workbook) {
+ Object.defineProperties(this, {
+ _: {value: workbook},
+ sheetNames: {
+ value: workbook.worksheets.map((s) => s.name),
+ enumerable: true,
+ },
+ });
+ }
+ sheet(name, options) {
+ const sname =
+ typeof name === "number"
+ ? this.sheetNames[name]
+ : this.sheetNames.includes((name += ""))
+ ? name
+ : null;
+ if (sname == null) throw new Error(`Sheet not found: ${name}`);
+ const sheet = this._.getWorksheet(sname);
+ return extract(sheet, options);
+ }
+}
+
+function extract(sheet, {range, headers = false} = {}) {
+ let [[c0, r0], [c1, r1]] = parseRange(range, sheet);
+ const headerRow = headers && sheet._rows[r0++];
+ let names = new Set(["#"]);
+ for (let n = c0; n <= c1; n++) {
+ let name = (headerRow ? valueOf(headerRow._cells[n]) : null) || toColumn(n);
+ while (names.has(name)) name += "_";
+ names.add(name);
+ }
+ names = new Array(c0).concat(Array.from(names));
+
+ const output = new Array(r1 - r0 + 1);
+ for (let r = r0; r <= r1; r++) {
+ const row = (output[r - r0] = Object.defineProperty({}, "#", {
+ value: r + 1,
+ }));
+ const _row = sheet._rows[r];
+ if (_row && _row.hasValues)
+ for (let c = c0; c <= c1; c++) {
+ const value = valueOf(_row._cells[c]);
+ if (value != null) row[names[c + 1]] = value;
+ }
+ }
+
+ output.columns = names.filter(() => true); // Filter sparse columns
+ return output;
+}
+
+function valueOf(cell) {
+ if (!cell) return;
+ const {value} = cell;
+ if (value && value instanceof Date) return value;
+ if (value && typeof value === "object") {
+ if (value.formula || value.sharedFormula)
+ return value.result && value.result.error ? NaN : value.result;
+ if (value.richText) return value.richText.map((d) => d.text).join("");
+ if (value.text) {
+ let {text} = value;
+ if (text.richText) text = text.richText.map((d) => d.text).join("");
+ return value.hyperlink && value.hyperlink !== text
+ ? `${value.hyperlink} ${text}`
+ : text;
+ }
+ return value;
+ }
+ return value;
+}
+
+function parseRange(specifier = ":", {columnCount, rowCount}) {
+ specifier += "";
+ if (!specifier.match(/^[A-Z]*\d*:[A-Z]*\d*$/))
+ throw new Error("Malformed range specifier");
+ const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] =
+ specifier.split(":").map(fromCellReference);
+ return [
+ [c0, r0],
+ [c1, r1],
+ ];
+}
+
+// Returns the default column name for a zero-based column index.
+// For example: 0 -> "A", 1 -> "B", 25 -> "Z", 26 -> "AA", 27 -> "AB".
+function toColumn(c) {
+ let sc = "";
+ c++;
+ do {
+ sc = String.fromCharCode(64 + (c % 26 || 26)) + sc;
+ } while ((c = Math.floor((c - 1) / 26)));
+ return sc;
+}
+
+// Returns the zero-based indexes from a cell reference.
+// For example: "A1" -> [0, 0], "B2" -> [1, 1], "AA10" -> [26, 9].
+function fromCellReference(s) {
+ const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/);
+ let c = 0;
+ if (sc)
+ for (let i = 0; i < sc.length; i++)
+ c += Math.pow(26, sc.length - i - 1) * (sc.charCodeAt(i) - 64);
+ return [c ? c - 1 : undefined, sr ? +sr - 1 : undefined];
+}
diff --git a/test/xlsx-test.js b/test/xlsx-test.js
new file mode 100644
index 00000000..773661a2
--- /dev/null
+++ b/test/xlsx-test.js
@@ -0,0 +1,278 @@
+import {test} from "tap";
+import {Workbook} from "../src/xlsx.js";
+
+function mockWorkbook(contents, overrides = {}) {
+ return {
+ worksheets: Object.keys(contents).map((name) => ({name})),
+ getWorksheet(name) {
+ const _rows = contents[name];
+ return Object.assign(
+ {
+ _rows: _rows.map((row) => ({
+ _cells: row.map((cell) => ({value: cell})),
+ hasValues: !!row.length,
+ })),
+ rowCount: _rows.length,
+ columnCount: Math.max(..._rows.map((r) => r.length)),
+ },
+ overrides
+ );
+ },
+ };
+}
+
+test("FileAttachment.xlsx reads sheet names", (t) => {
+ const workbook = new Workbook(mockWorkbook({Sheet1: []}));
+ t.same(workbook.sheetNames, ["Sheet1"]);
+ t.end();
+});
+
+test("FileAttachment.xlsx sheet(name) throws on unknown sheet name", (t) => {
+ const workbook = new Workbook(mockWorkbook({Sheet1: []}));
+ t.throws(() => workbook.sheet("bad"));
+ t.end();
+});
+
+test("FileAttachment.xlsx reads sheets", (t) => {
+ const workbook = new Workbook(
+ mockWorkbook({
+ Sheet1: [
+ ["one", "two", "three"],
+ [1, 2, 3],
+ ],
+ })
+ );
+ t.same(workbook.sheet(0), [
+ {A: "one", B: "two", C: "three"},
+ {A: 1, B: 2, C: 3},
+ ]);
+ t.same(workbook.sheet("Sheet1"), [
+ {A: "one", B: "two", C: "three"},
+ {A: 1, B: 2, C: 3},
+ ]);
+ t.end();
+});
+
+test("FileAttachment.xlsx reads sheets with different types", (t) => {
+ t.same(
+ new Workbook(
+ mockWorkbook({
+ Sheet1: [
+ [],
+ [null, undefined],
+ ["hello", "", "0", "1"],
+ [1, 1.2],
+ [true, false],
+ [new Date(Date.UTC(2020, 0, 1)), {}],
+ ],
+ })
+ ).sheet(0),
+ [
+ {},
+ {},
+ {A: "hello", B: "", C: "0", D: "1"},
+ {A: 1, B: 1.2},
+ {A: true, B: false},
+ {A: new Date(Date.UTC(2020, 0, 1)), B: {}},
+ ],
+ "nullish, strings, numbers, booleans, dates, objects"
+ );
+ t.same(
+ new Workbook(
+ mockWorkbook({
+ Sheet1: [
+ [
+ {richText: [{text: "two"}, {text: "three"}]}, // A
+ {text: "plain text"}, // B
+ {text: "https://example.com", hyperlink: "https://example.com"}, // C
+ {
+ text: {richText: [{text: "https://example.com"}]}, // D
+ hyperlink: "https://example.com",
+ },
+ {text: `link&"'?`, hyperlink: 'https://example.com?q="'}, // E
+ {
+ text: {richText: [{text: "first"}, {text: "second"}]}, // F
+ hyperlink: "https://example.com",
+ },
+ ],
+ ],
+ })
+ ).sheet(0),
+ [
+ {
+ A: "twothree",
+ B: "plain text",
+ C: "https://example.com",
+ D: "https://example.com",
+ E: `https://example.com?q=" link&"'?`,
+ F: "https://example.com firstsecond",
+ },
+ ],
+ "rich text, text, hyperlink text"
+ );
+ t.same(
+ new Workbook(
+ mockWorkbook({
+ Sheet1: [
+ [
+ {formula: "=B2*5", result: 10},
+ {sharedFormula: "=B2*6", result: 12},
+ {sharedFormula: "=Z2*6", result: {error: "#REF!"}},
+ ],
+ ],
+ })
+ ).sheet(0),
+ [{A: 10, B: 12, C: NaN}],
+ "formula results, errors"
+ );
+
+ t.end();
+});
+
+test("FileAttachment.xlsx reads sheets with headers", (t) => {
+ const workbook = new Workbook(
+ mockWorkbook({
+ Sheet1: [
+ [null, "one", "one", "two", "A", "0"],
+ [1, null, 3, 4, 5, "zero"],
+ [6, 7, 8, 9, 10],
+ ],
+ })
+ );
+ t.same(workbook.sheet(0, {headers: true}), [
+ {A: 1, one_: 3, two: 4, A_: 5, 0: "zero"},
+ {A: 6, one: 7, one_: 8, two: 9, A_: 10},
+ ]);
+ t.same(workbook.sheet(0, {headers: true}).columns, [
+ "#",
+ "A",
+ "one",
+ "one_",
+ "two",
+ "A_",
+ "0",
+ ]);
+ t.end();
+});
+
+test("FileAttachment.xlsx throws on invalid ranges", (t) => {
+ const workbook = new Workbook(mockWorkbook({Sheet1: []}));
+ const malformed = new Error("Malformed range specifier");
+
+ t.throws(() => t.same(workbook.sheet(0, {range: ""})), malformed);
+ t.throws(() => t.same(workbook.sheet(0, {range: "-:"})), malformed);
+ t.throws(() => t.same(workbook.sheet(0, {range: " :"})), malformed);
+ t.throws(
+ () => t.same(workbook.sheet(0, {range: "a1:"})),
+ malformed,
+ "lowercase"
+ );
+ t.throws(() => t.same(workbook.sheet(0, {range: "1A:"})), malformed);
+
+ t.end();
+});
+
+test("FileAttachment.xlsx reads sheet ranges", (t) => {
+ const workbook = new Workbook(
+ mockWorkbook({
+ Sheet1: [
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+ [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+ [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
+ [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
+ ],
+ })
+ );
+
+ // undefined
+ // ":"
+ const entireSheet = [
+ {A: 0, B: 1, C: 2, D: 3, E: 4, F: 5, G: 6, H: 7, I: 8, J: 9},
+ {A: 10, B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19},
+ {A: 20, B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29},
+ {A: 30, B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39},
+ ];
+ t.same(workbook.sheet(0), entireSheet);
+ t.same(workbook.sheet(0, {range: ":"}), entireSheet);
+ t.same(workbook.sheet(0).columns, ["#", ...Object.keys(entireSheet[0])]);
+
+ // "B2:C3"
+ t.same(workbook.sheet(0, {range: "B2:C3"}), [
+ {B: 11, C: 12},
+ {B: 21, C: 22},
+ ]);
+
+ // ":C3"
+ t.same(workbook.sheet(0, {range: ":C3"}), [
+ {A: 0, B: 1, C: 2},
+ {A: 10, B: 11, C: 12},
+ {A: 20, B: 21, C: 22},
+ ]);
+
+ // "B2:"
+ t.same(workbook.sheet(0, {range: "B2:"}), [
+ {B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19},
+ {B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29},
+ {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39},
+ ]);
+
+ // "H:"
+ t.same(workbook.sheet(0, {range: "H:"}), [
+ {H: 7, I: 8, J: 9},
+ {H: 17, I: 18, J: 19},
+ {H: 27, I: 28, J: 29},
+ {H: 37, I: 38, J: 39},
+ ]);
+
+ // ":C"
+ t.same(workbook.sheet(0, {range: ":C"}), [
+ {A: 0, B: 1, C: 2},
+ {A: 10, B: 11, C: 12},
+ {A: 20, B: 21, C: 22},
+ {A: 30, B: 31, C: 32},
+ ]);
+
+ // ":Z"
+ t.same(workbook.sheet(0, {range: ":Z"}), entireSheet);
+ t.same(
+ workbook.sheet(0, {range: ":Z"}).columns,
+ "#ABCDEFGHIJKLMNOPQRSTUVWXYZ".split("")
+ );
+
+ // "2:"
+ t.same(workbook.sheet(0, {range: "2:"}), entireSheet.slice(1));
+
+ // ":2"
+ t.same(workbook.sheet(0, {range: ":2"}), entireSheet.slice(0, 2));
+
+ t.end();
+});
+
+test("FileAttachment.xlsx throws on unknown range specifier", (t) => {
+ const workbook = new Workbook(mockWorkbook({Sheet1: []}));
+ t.throws(() => workbook.sheet(0, {range: 0}));
+ t.end();
+});
+
+test("FileAttachment.xlsx derives column names such as A AA AAA…", (t) => {
+ const l0 = 26 * 26 * 26 + 26 * 26 + 26;
+ const workbook = new Workbook(
+ mockWorkbook({
+ Sheet1: [Array.from({length: l0}).fill(1)],
+ })
+ );
+ t.same(
+ workbook.sheet(0, {headers: false}).columns.filter((d) => d.match(/^A*$/)),
+ ["A", "AA", "AAA"]
+ );
+ const workbook1 = new Workbook(
+ mockWorkbook({
+ Sheet1: [Array.from({length: l0 + 1}).fill(1)],
+ })
+ );
+ t.same(
+ workbook1.sheet(0, {headers: false}).columns.filter((d) => d.match(/^A*$/)),
+ ["A", "AA", "AAA", "AAAA"]
+ );
+ t.end();
+});