From d598c41885da7ae1837ff9c7b74bd1fb109bd258 Mon Sep 17 00:00:00 2001 From: Jover Date: Fri, 27 Aug 2021 16:16:38 -0700 Subject: [PATCH 1/4] add xlsx npm package --- package-lock.json | 97 ++++++++++++++++++++++++++++++++++++++++++++++- package.json | 1 + 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/package-lock.json b/package-lock.json index 398f84c70..0692afacc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "auspice", - "version": "2.23.0", + "version": "2.29.1", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -3460,6 +3460,15 @@ "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.2.0.tgz", "integrity": "sha512-7evsyfH1cLOCdAzZAd43Cic04yKydNx0cF+7tiA19p1XnLLPU4dpCQOqpjqwokFe//vS0QqfqqjCS2JkiIs0cA==" }, + "adler-32": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.2.0.tgz", + "integrity": "sha1-aj5r8KY5ALoVZSgIyxXGgT0aXyU=", + "requires": { + "exit-on-epipe": "~1.0.1", + "printj": "~1.1.0" + } + }, "agent-base": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz", @@ -4595,6 +4604,16 @@ "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=" }, + "cfb": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.0.tgz", + "integrity": "sha512-sXMvHsKCICVR3Naq+J556K+ExBo9n50iKl6LGarlnvuA2035uMlGA/qVrc0wQtow5P1vJEw9UyrKLCbtIKz+TQ==", + "requires": { + "adler-32": "~1.2.0", + "crc-32": "~1.2.0", + "printj": "~1.1.2" + } + }, "chai": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/chai/-/chai-4.2.0.tgz", @@ -4873,6 +4892,11 @@ "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=", "dev": true }, + "codepage": { + "version": "1.15.0", + "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz", + "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==" + }, "collect-v8-coverage": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz", @@ -5215,6 +5239,15 @@ } } }, + "crc-32": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.0.tgz", + "integrity": "sha512-1uBwHxF+Y/4yF5G48fwnKq6QsIXheor3ZLPT80yGBV1oEUwpPojlEhQbWKVw1VwcTQyMGHK1/XMmTjmlsmTTGA==", + "requires": { + "exit-on-epipe": "~1.0.1", + "printj": "~1.1.0" + } + }, "create-ecdh": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/create-ecdh/-/create-ecdh-4.0.3.tgz", @@ -6488,6 +6521,11 @@ "resolved": "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz", "integrity": "sha1-BjJjj42HfMghB9MKD/8aF8uhzQw=" }, + "exit-on-epipe": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/exit-on-epipe/-/exit-on-epipe-1.0.1.tgz", + "integrity": "sha512-h2z5mrROTxce56S+pnvAV890uu7ls7f1kEvVGJbw1OlFH3/mlJ5bkXu0KRyW94v37zzHPiUd55iLn3DA7TjWpw==" + }, "expand-brackets": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", @@ -6844,6 +6882,11 @@ "pend": "~1.2.0" } }, + "fflate": { + "version": "0.3.11", + "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.3.11.tgz", + "integrity": "sha512-Rr5QlUeGN1mbOHlaqcSYMKVpPbgLy0AWT/W0EHxA6NGI12yO1jpoui2zBBvU2G824ltM6Ut8BFgfHSBGfkmS0A==" + }, "figgy-pudding": { "version": "3.5.1", "resolved": "https://registry.npmjs.org/figgy-pudding/-/figgy-pudding-3.5.1.tgz", @@ -7165,6 +7208,11 @@ "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=" }, + "frac": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz", + "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==" + }, "fragment-cache": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/fragment-cache/-/fragment-cache-0.2.1.tgz", @@ -15121,6 +15169,11 @@ "minimist": "^1.2.0" } }, + "printj": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/printj/-/printj-1.1.2.tgz", + "integrity": "sha512-zA2SmoLaxZyArQTOPj5LXecR+RagfPSU5Kw1qP+jkWeNlrq+eJZyY2oS68SU1Z/7/myXM4lo9716laOFAVStCQ==" + }, "private": { "version": "0.1.8", "resolved": "https://registry.npmjs.org/private/-/private-0.1.8.tgz", @@ -16579,6 +16632,14 @@ "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=" }, + "ssf": { + "version": "0.11.2", + "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz", + "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==", + "requires": { + "frac": "~1.1.2" + } + }, "sshpk": { "version": "1.16.1", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", @@ -18247,6 +18308,16 @@ "string-width": "^1.0.2 || 2" } }, + "wmf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz", + "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==" + }, + "word": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz", + "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==" + }, "wordwrap": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", @@ -18329,6 +18400,30 @@ "async-limiter": "~1.0.0" } }, + "xlsx": { + "version": "0.17.1", + "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.17.1.tgz", + "integrity": "sha512-SrvK+kMEjiVIKYyJSjSIJwzm2cZn8nQWVh708g7O+pTsmgjoa+uYNLEUn7jmwQdMI/ffCHcY5yEvwBXssBwpRA==", + "requires": { + "adler-32": "~1.2.0", + "cfb": "^1.1.4", + "codepage": "~1.15.0", + "commander": "~2.17.1", + "crc-32": "~1.2.0", + "exit-on-epipe": "~1.0.1", + "fflate": "^0.3.8", + "ssf": "~0.11.2", + "wmf": "~1.0.1", + "word": "~0.3.0" + }, + "dependencies": { + "commander": { + "version": "2.17.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.17.1.tgz", + "integrity": "sha512-wPMUt6FnH2yzG95SA6mzjQOEKUU3aLaDEmzs1ti+1E9h+CsrZghRlqEM/EJ4KscsQVG8uNN4uVreUeT8+drlgg==" + } + } + }, "xml-name-validator": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz", diff --git a/package.json b/package.json index 2baa84f23..87f773c33 100644 --- a/package.json +++ b/package.json @@ -129,6 +129,7 @@ "webpack-dev-middleware": "^3.1.3", "webpack-hot-middleware": "^2.24.3", "whatwg-fetch": "^0.10.1", + "xlsx": "^0.17.1", "yaml-front-matter": "^4.0.0" }, "devDependencies": { From abd46d555d05a63c63c5357d3799435743688e31 Mon Sep 17 00:00:00 2001 From: Jover Date: Fri, 27 Aug 2021 18:42:18 -0700 Subject: [PATCH 2/4] drag-and-drop: support metadata in Excel format Uses the xlsx package to convert dropped files to a CSV string that can then be parsed by Papa Parse. Currently only reads the first sheet of Excel files. We continue to use Papa Parse instead of using `xlsx.utils.sheet_to_json` because papaparse has built in options for `comments` and `dynamicTyping` that are not available in xlsx. --- src/actions/filesDropped/constants.js | 19 ++++--- src/actions/filesDropped/index.js | 6 +-- src/actions/filesDropped/metadata.js | 69 ++++++++++++++----------- src/actions/filesDropped/parseCsvTsv.js | 13 ++--- 4 files changed, 58 insertions(+), 49 deletions(-) diff --git a/src/actions/filesDropped/constants.js b/src/actions/filesDropped/constants.js index f3f10fd15..1a0d14fce 100644 --- a/src/actions/filesDropped/constants.js +++ b/src/actions/filesDropped/constants.js @@ -2,17 +2,20 @@ Defines acceptable file types for the auspice drag & drop functionality. */ -const csv_file_types = ["text/csv", "application/vnd.ms-excel"]; - -// Add MacOS & Linux .tsv to accepted file types -const accepted_file_types = csv_file_types.concat("text/tab-separated-values"); +const acceptedFileTypes = [ + "text/csv", + "text/tab-separated-values", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" +]; // Handle Windows .tsv edge case with empty file type -const is_windows_tsv = (file) => file.type === "" && file.name.endsWith('.tsv'); +const isWindowsTsv = (file) => file.type === "" && file.name.endsWith('.tsv'); + +// Handle Excel exported .csv files +const isExcelCsv = (file) => file.type === "application/vnd.ms-excel" && file.name.endsWith('.csv'); -const is_csv_or_tsv = (file) => accepted_file_types.includes(file.type) || is_windows_tsv(file); +const isAcceptedFileType = (file) => acceptedFileTypes.includes(file.type) || isWindowsTsv(file) || isExcelCsv(file); export { - csv_file_types, - is_csv_or_tsv + isAcceptedFileType }; diff --git a/src/actions/filesDropped/index.js b/src/actions/filesDropped/index.js index 489eb8619..020509126 100644 --- a/src/actions/filesDropped/index.js +++ b/src/actions/filesDropped/index.js @@ -1,6 +1,6 @@ import { warningNotification } from "../notifications"; import handleMetadata from "./metadata"; -import { is_csv_or_tsv } from "./constants"; +import { isAcceptedFileType } from "./constants"; /** @@ -18,13 +18,13 @@ const handleFilesDropped = (files) => (dispatch, getState) => { const file = files[0]; - if (is_csv_or_tsv(file)) { + if (isAcceptedFileType(file)) { return handleMetadata(dispatch, getState, file); } return dispatch(warningNotification({ message: `Cannot parse ${file.name}`, - details: `Currently only CSV & TSV files are allowed, not ${file.type}` + details: `Currently only CSV/TSV/XLSX files are allowed, not ${file.type}` })); }; diff --git a/src/actions/filesDropped/metadata.js b/src/actions/filesDropped/metadata.js index a2cfe32ec..de1c875e6 100644 --- a/src/actions/filesDropped/metadata.js +++ b/src/actions/filesDropped/metadata.js @@ -6,38 +6,49 @@ import { parseCsvTsv } from "./parseCsvTsv"; const handleMetadata = async (dispatch, getState, file) => { const fileName = file.name; + const reader = new FileReader(); + reader.onload = async (event) => { + try { + const XLSX = (await import("xlsx/xlsx.mini")).default; + /* Convert accepted dropped file to CSV string */ + /* If dropped file is Excel workbook, only reads in the data from the first sheet */ + const workbook = XLSX.read(event.target.result, { type: 'binary' }); + const firstSheet = workbook.Sheets[workbook.SheetNames[0]]; + const sheetAsCsv = XLSX.utils.sheet_to_csv(firstSheet); - try { - /* Parse & interrogate the CSV file */ - const {errors, data, meta} = await parseCsvTsv(file); - if (errors.length) { - console.error(errors); - throw new Error(errors.map((e) => e.message).join(", ")); + /* All accepted file formats have been converted to CSV string by xlsx */ + /* Use papaparse to parse & interrogate the CSV string */ + const {errors, data, meta} = await parseCsvTsv(sheetAsCsv); + if (errors.length) { + console.error(errors); + throw new Error(errors.map((e) => e.message).join(", ")); + } + const {coloringInfo, strainKey, latLongKeys, ignoredFields} = processHeader(meta.fields); + const rows = {}; + data.forEach((d) => {rows[d[strainKey]]=d;}); + + /* For each coloring, extract values defined in each row etc */ + const newNodeAttrs = {}; + const newColorings = processColorings(newNodeAttrs, coloringInfo, rows, fileName); // modifies `newNodeAttrs` + const newGeoResolution = latLongKeys ? processLatLongs(newNodeAttrs, latLongKeys, rows, fileName) : undefined; + /* Fix errors in data & dispatch warnings here, as we cannot dispatch in the reducers */ + const ok = checkDataForErrors(dispatch, getState, newNodeAttrs, newColorings, ignoredFields, fileName); + if (!ok) return undefined; + + dispatch({type: ADD_EXTRA_METADATA, newColorings, newGeoResolution, newNodeAttrs}); + return dispatch(successNotification({ + message: `Adding metadata from ${fileName}`, + details: `${Object.keys(newColorings).length} new coloring${Object.keys(newColorings).length > 1 ? "s" : ""} for ${Object.keys(newNodeAttrs).length} node${Object.keys(newNodeAttrs).length > 1 ? "s" : ""}` + })); + } catch (err) { + return dispatch(errorNotification({ + message: `Parsing of ${fileName} failed`, + details: err.message + })); } - const {coloringInfo, strainKey, latLongKeys, ignoredFields} = processHeader(meta.fields); - const rows = {}; - data.forEach((d) => {rows[d[strainKey]]=d;}); - - /* For each coloring, extract values defined in each row etc */ - const newNodeAttrs = {}; - const newColorings = processColorings(newNodeAttrs, coloringInfo, rows, fileName); // modifies `newNodeAttrs` - const newGeoResolution = latLongKeys ? processLatLongs(newNodeAttrs, latLongKeys, rows, fileName) : undefined; - /* Fix errors in data & dispatch warnings here, as we cannot dispatch in the reducers */ - const ok = checkDataForErrors(dispatch, getState, newNodeAttrs, newColorings, ignoredFields, fileName); - if (!ok) return undefined; - - dispatch({type: ADD_EXTRA_METADATA, newColorings, newGeoResolution, newNodeAttrs}); - return dispatch(successNotification({ - message: `Adding metadata from ${fileName}`, - details: `${Object.keys(newColorings).length} new coloring${Object.keys(newColorings).length > 1 ? "s" : ""} for ${Object.keys(newNodeAttrs).length} node${Object.keys(newNodeAttrs).length > 1 ? "s" : ""}` - })); + }; - } catch (err) { - return dispatch(errorNotification({ - message: `Parsing of ${fileName} failed`, - details: err.message - })); - } + return reader.readAsBinaryString(file); }; export default handleMetadata; diff --git a/src/actions/filesDropped/parseCsvTsv.js b/src/actions/filesDropped/parseCsvTsv.js index 8303b4c3d..c3f099a42 100644 --- a/src/actions/filesDropped/parseCsvTsv.js +++ b/src/actions/filesDropped/parseCsvTsv.js @@ -1,5 +1,3 @@ -import { csv_file_types, is_csv_or_tsv } from "./constants"; - let Papa; /* lazyily imported once a file is dropped on */ /** @@ -8,15 +6,12 @@ let Papa; /* lazyily imported once a file is dropped on */ * in here annd, you guessed it, this causes all sorts of problems. * https://github.com/mholt/PapaParse/issues/169 suggests adding encoding: "ISO-8859-1" * to the config, which may work - * @param {DataTransfer} file a DataTransfer object + * @param {string} csvString a string of delimited text */ -export const parseCsvTsv = async (file) => { +export const parseCsvTsv = async (csvString) => { if (!Papa) Papa = (await import("papaparse")).default; return new Promise((resolve, reject) => { - if (!(is_csv_or_tsv(file))) { - reject(new Error("Cannot parse this filetype")); - } - Papa.parse(file, { + Papa.parse(csvString, { header: true, complete: (results) => { resolve(results); @@ -26,7 +21,7 @@ export const parseCsvTsv = async (file) => { }, encoding: "UTF-8", comments: "#", - delimiter: (csv_file_types.includes(file.type)) ? "," : "\t", + delimiter: ",", skipEmptyLines: true, dynamicTyping: false }); From 78b3671d74d055a9489207adf2f0f51cc2f18153 Mon Sep 17 00:00:00 2001 From: Jover Date: Fri, 27 Aug 2021 19:05:35 -0700 Subject: [PATCH 3/4] docs: add Excel to drag-and-drop section --- docs/advanced-functionality/drag-drop-csv-tsv.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/advanced-functionality/drag-drop-csv-tsv.md b/docs/advanced-functionality/drag-drop-csv-tsv.md index dd7cb34cf..7198d5be1 100644 --- a/docs/advanced-functionality/drag-drop-csv-tsv.md +++ b/docs/advanced-functionality/drag-drop-csv-tsv.md @@ -1,15 +1,17 @@ -# Adding extra metadata via CSV/TSV +# Adding extra metadata via CSV/TSV/XLSX A common use case is to have additional metadata which you would like to add to the current dataset. If you created the dataset itself, then you may wish to keep certain data out of the dataset, as it may change frequently or be sensitive information which you don't want to share publicly. -Additional metadata (CSV / TSV file(s)) can be dragged onto an existing dataset in Auspice. +Additional metadata (CSV / TSV / XLSX file(s)) can be dragged onto an existing dataset in Auspice. These extra data are processed within the browser, so no information leaves the client, which can be useful for viewing private metadata. The general format is compatible with other popular tools such as [MicroReact](https://microreact.org/). The first column defines the names of the strains / samples in the tree, while the first row (header row) defines the metadata names. +You can add as many columns you want, each will result in a different colouring of the data being made available. The separator can be either a tab character or a comma & the file extension should be `.tsv` or `.csv`, respectively. -You can add as many columns you want, each will result in a different colouring of the data being made available +Excel files with file extension `.xlsx` are also supported, but the metadata must be in the first sheet of the workbook. +Older Excel files with the `.xls` extension are not supported. ## Example: @@ -44,7 +46,7 @@ USVI/42/2016 C #710000 0 -120 Most metadata columns will be added as colourings; once the data has been added they should appear as new entries in the "Color By" dropdown (Left-hand sidebar of Auspice). This means you can also filter by these traits using the "Filter Data" box. -An extra colouring is automatically created to represent the set of samples which were in the CSV/TSV file -- this allows you to easily filter the dataset to just those samples which you had in your metadata file. +An extra colouring is automatically created to represent the set of samples which were in the CSV/TSV/XLSX file -- this allows you to easily filter the dataset to just those samples which you had in your metadata file. You can choose the colours you want to associate with values by adding in a separate column with the same name + `__colour` (see above example), or the suffix `__color` may also be used. Currently the values in this column must be hex values such as `#3498db` (blue). From e4c91f44ab59fa99e5d751648f4881dc3edb8944 Mon Sep 17 00:00:00 2001 From: Jover Date: Fri, 27 Aug 2021 19:13:38 -0700 Subject: [PATCH 4/4] rename parseCsvTsv to parseCsv --- src/actions/filesDropped/metadata.js | 4 ++-- src/actions/filesDropped/{parseCsvTsv.js => parseCsv.js} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename src/actions/filesDropped/{parseCsvTsv.js => parseCsv.js} (94%) diff --git a/src/actions/filesDropped/metadata.js b/src/actions/filesDropped/metadata.js index de1c875e6..3f1dd9545 100644 --- a/src/actions/filesDropped/metadata.js +++ b/src/actions/filesDropped/metadata.js @@ -1,7 +1,7 @@ import { rgb } from "d3-color"; import { errorNotification, successNotification, warningNotification } from "../notifications"; import { ADD_EXTRA_METADATA } from "../types"; -import { parseCsvTsv } from "./parseCsvTsv"; +import { parseCsv } from "./parseCsv"; const handleMetadata = async (dispatch, getState, file) => { @@ -18,7 +18,7 @@ const handleMetadata = async (dispatch, getState, file) => { /* All accepted file formats have been converted to CSV string by xlsx */ /* Use papaparse to parse & interrogate the CSV string */ - const {errors, data, meta} = await parseCsvTsv(sheetAsCsv); + const {errors, data, meta} = await parseCsv(sheetAsCsv); if (errors.length) { console.error(errors); throw new Error(errors.map((e) => e.message).join(", ")); diff --git a/src/actions/filesDropped/parseCsvTsv.js b/src/actions/filesDropped/parseCsv.js similarity index 94% rename from src/actions/filesDropped/parseCsvTsv.js rename to src/actions/filesDropped/parseCsv.js index c3f099a42..b440420a7 100644 --- a/src/actions/filesDropped/parseCsvTsv.js +++ b/src/actions/filesDropped/parseCsv.js @@ -8,7 +8,7 @@ let Papa; /* lazyily imported once a file is dropped on */ * to the config, which may work * @param {string} csvString a string of delimited text */ -export const parseCsvTsv = async (csvString) => { +export const parseCsv = async (csvString) => { if (!Papa) Papa = (await import("papaparse")).default; return new Promise((resolve, reject) => { Papa.parse(csvString, {