Merge pull request #1386 from nextstrain/drag-drop-excel

Drag-and-drop Excel
nextstrain · Sep 7, 2021 · 56afe6d · 56afe6d
2 parents 3ff0f75 + e4c91f4
commit 56afe6d
Show file tree

Hide file tree

Showing 7 changed files with 162 additions and 55 deletions.
diff --git a/docs/advanced-functionality/drag-drop-csv-tsv.md b/docs/advanced-functionality/drag-drop-csv-tsv.md
@@ -1,15 +1,17 @@
-# Adding extra metadata via CSV/TSV
+# Adding extra metadata via CSV/TSV/XLSX
 
 A common use case is to have additional metadata which you would like to add to the current dataset.
 If you created the dataset itself, then you may wish to keep certain data out of the dataset, as it may change frequently or be sensitive information which you don't want to share publicly.
 
-Additional metadata (CSV / TSV file(s)) can be dragged onto an existing dataset in Auspice.
+Additional metadata (CSV / TSV / XLSX file(s)) can be dragged onto an existing dataset in Auspice.
 These extra data are processed within the browser, so no information leaves the client, which can be useful for viewing private metadata.
 
 The general format is compatible with other popular tools such as [MicroReact](https://microreact.org/).
 The first column defines the names of the strains / samples in the tree, while the first row (header row) defines the metadata names.
+You can add as many columns you want, each will result in a different colouring of the data being made available.
 The separator can be either a tab character or a comma & the file extension should be `.tsv` or `.csv`, respectively.
-You can add as many columns you want, each will result in a different colouring of the data being made available
+Excel files with file extension `.xlsx` are also supported, but the metadata must be in the first sheet of the workbook.
+Older Excel files with the `.xls` extension are not supported.
 
 ## Example:
 
@@ -44,7 +46,7 @@ USVI/42/2016	C	#710000	0	-120
 Most metadata columns will be added as colourings; once the data has been added they should appear as new entries in the "Color By" dropdown (Left-hand sidebar of Auspice).
 This means you can also filter by these traits using the "Filter Data" box.
 
-An extra colouring is automatically created to represent the set of samples which were in the CSV/TSV file -- this allows you to easily filter the dataset to just those samples which you had in your metadata file.
+An extra colouring is automatically created to represent the set of samples which were in the CSV/TSV/XLSX file -- this allows you to easily filter the dataset to just those samples which you had in your metadata file.
 
 You can choose the colours you want to associate with values by adding in a separate column with the same name + `__colour` (see above example), or the suffix `__color` may also be used.
 Currently the values in this column must be hex values such as `#3498db` (blue).

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -129,6 +129,7 @@
     "webpack-dev-middleware": "^3.1.3",
     "webpack-hot-middleware": "^2.24.3",
     "whatwg-fetch": "^0.10.1",
+    "xlsx": "^0.17.1",
     "yaml-front-matter": "^4.0.0"
   },
   "devDependencies": {

diff --git a/src/actions/filesDropped/constants.js b/src/actions/filesDropped/constants.js
@@ -2,17 +2,20 @@
     Defines acceptable file types for the auspice drag & drop functionality.
 */
 
-const csv_file_types = ["text/csv", "application/vnd.ms-excel"];
-
-// Add MacOS & Linux .tsv to accepted file types
-const accepted_file_types = csv_file_types.concat("text/tab-separated-values");
+const acceptedFileTypes = [
+  "text/csv",
+  "text/tab-separated-values",
+  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+];
 
 // Handle Windows .tsv edge case with empty file type
-const is_windows_tsv = (file) => file.type === "" && file.name.endsWith('.tsv');
+const isWindowsTsv = (file) => file.type === "" && file.name.endsWith('.tsv');
+
+// Handle Excel exported .csv files
+const isExcelCsv = (file) => file.type === "application/vnd.ms-excel" && file.name.endsWith('.csv');
 
-const is_csv_or_tsv = (file) => accepted_file_types.includes(file.type) || is_windows_tsv(file);
+const isAcceptedFileType = (file) => acceptedFileTypes.includes(file.type) || isWindowsTsv(file) || isExcelCsv(file);
 
 export {
-  csv_file_types,
-  is_csv_or_tsv
+  isAcceptedFileType
 };
diff --git a/src/actions/filesDropped/index.js b/src/actions/filesDropped/index.js
@@ -1,6 +1,6 @@
 import { warningNotification } from "../notifications";
 import handleMetadata from "./metadata";
-import { is_csv_or_tsv } from "./constants";
+import { isAcceptedFileType } from "./constants";
 
 
 /**
@@ -18,13 +18,13 @@ const handleFilesDropped = (files) => (dispatch, getState) => {
 
   const file = files[0];
 
-  if (is_csv_or_tsv(file)) {
+  if (isAcceptedFileType(file)) {
     return handleMetadata(dispatch, getState, file);
   }
 
   return dispatch(warningNotification({
     message: `Cannot parse ${file.name}`,
-    details: `Currently only CSV & TSV files are allowed, not ${file.type}`
+    details: `Currently only CSV/TSV/XLSX files are allowed, not ${file.type}`
   }));
 };
 

diff --git a/src/actions/filesDropped/metadata.js b/src/actions/filesDropped/metadata.js
@@ -1,43 +1,54 @@
 import { rgb } from "d3-color";
 import { errorNotification, successNotification, warningNotification } from "../notifications";
 import { ADD_EXTRA_METADATA } from "../types";
-import { parseCsvTsv } from "./parseCsvTsv";
+import { parseCsv } from "./parseCsv";
 
 
 const handleMetadata = async (dispatch, getState, file) => {
   const fileName = file.name;
+  const reader = new FileReader();
+  reader.onload = async (event) => {
+    try {
+      const XLSX = (await import("xlsx/xlsx.mini")).default;
+      /* Convert accepted dropped file to CSV string */
+      /* If dropped file is Excel workbook, only reads in the data from the first sheet */
+      const workbook = XLSX.read(event.target.result, { type: 'binary' });
+      const firstSheet = workbook.Sheets[workbook.SheetNames[0]];
+      const sheetAsCsv = XLSX.utils.sheet_to_csv(firstSheet);
 
-  try {
-    /* Parse & interrogate the CSV file */
-    const {errors, data, meta} = await parseCsvTsv(file);
-    if (errors.length) {
-      console.error(errors);
-      throw new Error(errors.map((e) => e.message).join(", "));
+      /* All accepted file formats have been converted to CSV string by xlsx */
+      /* Use papaparse to parse & interrogate the CSV string */
+      const {errors, data, meta} = await parseCsv(sheetAsCsv);
+      if (errors.length) {
+        console.error(errors);
+        throw new Error(errors.map((e) => e.message).join(", "));
+      }
+      const {coloringInfo, strainKey, latLongKeys, ignoredFields} = processHeader(meta.fields);
+      const rows = {};
+      data.forEach((d) => {rows[d[strainKey]]=d;});
+
+      /* For each coloring, extract values defined in each row etc */
+      const newNodeAttrs = {};
+      const newColorings = processColorings(newNodeAttrs, coloringInfo, rows, fileName); // modifies `newNodeAttrs`
+      const newGeoResolution = latLongKeys ? processLatLongs(newNodeAttrs, latLongKeys, rows, fileName) : undefined;
+      /* Fix errors in data & dispatch warnings here, as we cannot dispatch in the reducers */
+      const ok = checkDataForErrors(dispatch, getState, newNodeAttrs, newColorings, ignoredFields, fileName);
+      if (!ok) return undefined;
+
+      dispatch({type: ADD_EXTRA_METADATA, newColorings, newGeoResolution, newNodeAttrs});
+      return dispatch(successNotification({
+        message: `Adding metadata from ${fileName}`,
+        details: `${Object.keys(newColorings).length} new coloring${Object.keys(newColorings).length > 1 ? "s" : ""} for ${Object.keys(newNodeAttrs).length} node${Object.keys(newNodeAttrs).length > 1 ? "s" : ""}`
+      }));
+    } catch (err) {
+      return dispatch(errorNotification({
+        message: `Parsing of ${fileName} failed`,
+        details: err.message
+      }));
     }
-    const {coloringInfo, strainKey, latLongKeys, ignoredFields} = processHeader(meta.fields);
-    const rows = {};
-    data.forEach((d) => {rows[d[strainKey]]=d;});
-
-    /* For each coloring, extract values defined in each row etc */
-    const newNodeAttrs = {};
-    const newColorings = processColorings(newNodeAttrs, coloringInfo, rows, fileName); // modifies `newNodeAttrs`
-    const newGeoResolution = latLongKeys ? processLatLongs(newNodeAttrs, latLongKeys, rows, fileName) : undefined;
-    /* Fix errors in data & dispatch warnings here, as we cannot dispatch in the reducers */
-    const ok = checkDataForErrors(dispatch, getState, newNodeAttrs, newColorings, ignoredFields, fileName);
-    if (!ok) return undefined;
-
-    dispatch({type: ADD_EXTRA_METADATA, newColorings, newGeoResolution, newNodeAttrs});
-    return dispatch(successNotification({
-      message: `Adding metadata from ${fileName}`,
-      details: `${Object.keys(newColorings).length} new coloring${Object.keys(newColorings).length > 1 ? "s" : ""} for ${Object.keys(newNodeAttrs).length} node${Object.keys(newNodeAttrs).length > 1 ? "s" : ""}`
-    }));
+  };
 
-  } catch (err) {
-    return dispatch(errorNotification({
-      message: `Parsing of ${fileName} failed`,
-      details: err.message
-    }));
-  }
+  return reader.readAsBinaryString(file);
 };
 
 export default handleMetadata;

diff --git a/src/actions/filesDropped/parseCsvTsv.js → src/actions/filesDropped/parseCsv.js b/src/actions/filesDropped/parseCsvTsv.js → src/actions/filesDropped/parseCsv.js
@@ -1,5 +1,3 @@
-import { csv_file_types, is_csv_or_tsv } from "./constants";
-
 let Papa; /* lazyily imported once a file is dropped on */
 
 /**
@@ -8,15 +6,12 @@ let Papa; /* lazyily imported once a file is dropped on */
  * in here annd, you guessed it, this causes all sorts of problems.
  * https://github.com/mholt/PapaParse/issues/169 suggests adding encoding: "ISO-8859-1"
  * to the config, which may work
- * @param {DataTransfer} file a DataTransfer object
+ * @param {string} csvString a string of delimited text
  */
-export const parseCsvTsv = async (file) => {
+export const parseCsv = async (csvString) => {
   if (!Papa) Papa = (await import("papaparse")).default;
   return new Promise((resolve, reject) => {
-    if (!(is_csv_or_tsv(file))) {
-      reject(new Error("Cannot parse this filetype"));
-    }
-    Papa.parse(file, {
+    Papa.parse(csvString, {
       header: true,
       complete: (results) => {
         resolve(results);
@@ -26,7 +21,7 @@ export const parseCsvTsv = async (file) => {
       },
       encoding: "UTF-8",
       comments: "#",
-      delimiter: (csv_file_types.includes(file.type)) ? "," : "\t",
+      delimiter: ",",
       skipEmptyLines: true,
       dynamicTyping: false
     });