Skip to content

Commit

Permalink
Merge pull request #1386 from nextstrain/drag-drop-excel
Browse files Browse the repository at this point in the history
Drag-and-drop Excel
  • Loading branch information
jameshadfield authored Sep 7, 2021
2 parents 3ff0f75 + e4c91f4 commit 56afe6d
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 55 deletions.
10 changes: 6 additions & 4 deletions docs/advanced-functionality/drag-drop-csv-tsv.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# Adding extra metadata via CSV/TSV
# Adding extra metadata via CSV/TSV/XLSX

A common use case is to have additional metadata which you would like to add to the current dataset.
If you created the dataset itself, then you may wish to keep certain data out of the dataset, as it may change frequently or be sensitive information which you don't want to share publicly.

Additional metadata (CSV / TSV file(s)) can be dragged onto an existing dataset in Auspice.
Additional metadata (CSV / TSV / XLSX file(s)) can be dragged onto an existing dataset in Auspice.
These extra data are processed within the browser, so no information leaves the client, which can be useful for viewing private metadata.

The general format is compatible with other popular tools such as [MicroReact](https://microreact.org/).
The first column defines the names of the strains / samples in the tree, while the first row (header row) defines the metadata names.
You can add as many columns you want, each will result in a different colouring of the data being made available.
The separator can be either a tab character or a comma & the file extension should be `.tsv` or `.csv`, respectively.
You can add as many columns you want, each will result in a different colouring of the data being made available
Excel files with file extension `.xlsx` are also supported, but the metadata must be in the first sheet of the workbook.
Older Excel files with the `.xls` extension are not supported.

## Example:

Expand Down Expand Up @@ -44,7 +46,7 @@ USVI/42/2016 C #710000 0 -120
Most metadata columns will be added as colourings; once the data has been added they should appear as new entries in the "Color By" dropdown (Left-hand sidebar of Auspice).
This means you can also filter by these traits using the "Filter Data" box.

An extra colouring is automatically created to represent the set of samples which were in the CSV/TSV file -- this allows you to easily filter the dataset to just those samples which you had in your metadata file.
An extra colouring is automatically created to represent the set of samples which were in the CSV/TSV/XLSX file -- this allows you to easily filter the dataset to just those samples which you had in your metadata file.

You can choose the colours you want to associate with values by adding in a separate column with the same name + `__colour` (see above example), or the suffix `__color` may also be used.
Currently the values in this column must be hex values such as `#3498db` (blue).
Expand Down
97 changes: 96 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@
"webpack-dev-middleware": "^3.1.3",
"webpack-hot-middleware": "^2.24.3",
"whatwg-fetch": "^0.10.1",
"xlsx": "^0.17.1",
"yaml-front-matter": "^4.0.0"
},
"devDependencies": {
Expand Down
19 changes: 11 additions & 8 deletions src/actions/filesDropped/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@
Defines acceptable file types for the auspice drag & drop functionality.
*/

const csv_file_types = ["text/csv", "application/vnd.ms-excel"];

// Add MacOS & Linux .tsv to accepted file types
const accepted_file_types = csv_file_types.concat("text/tab-separated-values");
const acceptedFileTypes = [
"text/csv",
"text/tab-separated-values",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
];

// Handle Windows .tsv edge case with empty file type
const is_windows_tsv = (file) => file.type === "" && file.name.endsWith('.tsv');
const isWindowsTsv = (file) => file.type === "" && file.name.endsWith('.tsv');

// Handle Excel exported .csv files
const isExcelCsv = (file) => file.type === "application/vnd.ms-excel" && file.name.endsWith('.csv');

const is_csv_or_tsv = (file) => accepted_file_types.includes(file.type) || is_windows_tsv(file);
const isAcceptedFileType = (file) => acceptedFileTypes.includes(file.type) || isWindowsTsv(file) || isExcelCsv(file);

export {
csv_file_types,
is_csv_or_tsv
isAcceptedFileType
};
6 changes: 3 additions & 3 deletions src/actions/filesDropped/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { warningNotification } from "../notifications";
import handleMetadata from "./metadata";
import { is_csv_or_tsv } from "./constants";
import { isAcceptedFileType } from "./constants";


/**
Expand All @@ -18,13 +18,13 @@ const handleFilesDropped = (files) => (dispatch, getState) => {

const file = files[0];

if (is_csv_or_tsv(file)) {
if (isAcceptedFileType(file)) {
return handleMetadata(dispatch, getState, file);
}

return dispatch(warningNotification({
message: `Cannot parse ${file.name}`,
details: `Currently only CSV & TSV files are allowed, not ${file.type}`
details: `Currently only CSV/TSV/XLSX files are allowed, not ${file.type}`
}));
};

Expand Down
71 changes: 41 additions & 30 deletions src/actions/filesDropped/metadata.js
Original file line number Diff line number Diff line change
@@ -1,43 +1,54 @@
import { rgb } from "d3-color";
import { errorNotification, successNotification, warningNotification } from "../notifications";
import { ADD_EXTRA_METADATA } from "../types";
import { parseCsvTsv } from "./parseCsvTsv";
import { parseCsv } from "./parseCsv";


const handleMetadata = async (dispatch, getState, file) => {
const fileName = file.name;
const reader = new FileReader();
reader.onload = async (event) => {
try {
const XLSX = (await import("xlsx/xlsx.mini")).default;
/* Convert accepted dropped file to CSV string */
/* If dropped file is Excel workbook, only reads in the data from the first sheet */
const workbook = XLSX.read(event.target.result, { type: 'binary' });
const firstSheet = workbook.Sheets[workbook.SheetNames[0]];
const sheetAsCsv = XLSX.utils.sheet_to_csv(firstSheet);

try {
/* Parse & interrogate the CSV file */
const {errors, data, meta} = await parseCsvTsv(file);
if (errors.length) {
console.error(errors);
throw new Error(errors.map((e) => e.message).join(", "));
/* All accepted file formats have been converted to CSV string by xlsx */
/* Use papaparse to parse & interrogate the CSV string */
const {errors, data, meta} = await parseCsv(sheetAsCsv);
if (errors.length) {
console.error(errors);
throw new Error(errors.map((e) => e.message).join(", "));
}
const {coloringInfo, strainKey, latLongKeys, ignoredFields} = processHeader(meta.fields);
const rows = {};
data.forEach((d) => {rows[d[strainKey]]=d;});

/* For each coloring, extract values defined in each row etc */
const newNodeAttrs = {};
const newColorings = processColorings(newNodeAttrs, coloringInfo, rows, fileName); // modifies `newNodeAttrs`
const newGeoResolution = latLongKeys ? processLatLongs(newNodeAttrs, latLongKeys, rows, fileName) : undefined;
/* Fix errors in data & dispatch warnings here, as we cannot dispatch in the reducers */
const ok = checkDataForErrors(dispatch, getState, newNodeAttrs, newColorings, ignoredFields, fileName);
if (!ok) return undefined;

dispatch({type: ADD_EXTRA_METADATA, newColorings, newGeoResolution, newNodeAttrs});
return dispatch(successNotification({
message: `Adding metadata from ${fileName}`,
details: `${Object.keys(newColorings).length} new coloring${Object.keys(newColorings).length > 1 ? "s" : ""} for ${Object.keys(newNodeAttrs).length} node${Object.keys(newNodeAttrs).length > 1 ? "s" : ""}`
}));
} catch (err) {
return dispatch(errorNotification({
message: `Parsing of ${fileName} failed`,
details: err.message
}));
}
const {coloringInfo, strainKey, latLongKeys, ignoredFields} = processHeader(meta.fields);
const rows = {};
data.forEach((d) => {rows[d[strainKey]]=d;});

/* For each coloring, extract values defined in each row etc */
const newNodeAttrs = {};
const newColorings = processColorings(newNodeAttrs, coloringInfo, rows, fileName); // modifies `newNodeAttrs`
const newGeoResolution = latLongKeys ? processLatLongs(newNodeAttrs, latLongKeys, rows, fileName) : undefined;
/* Fix errors in data & dispatch warnings here, as we cannot dispatch in the reducers */
const ok = checkDataForErrors(dispatch, getState, newNodeAttrs, newColorings, ignoredFields, fileName);
if (!ok) return undefined;

dispatch({type: ADD_EXTRA_METADATA, newColorings, newGeoResolution, newNodeAttrs});
return dispatch(successNotification({
message: `Adding metadata from ${fileName}`,
details: `${Object.keys(newColorings).length} new coloring${Object.keys(newColorings).length > 1 ? "s" : ""} for ${Object.keys(newNodeAttrs).length} node${Object.keys(newNodeAttrs).length > 1 ? "s" : ""}`
}));
};

} catch (err) {
return dispatch(errorNotification({
message: `Parsing of ${fileName} failed`,
details: err.message
}));
}
return reader.readAsBinaryString(file);
};

export default handleMetadata;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { csv_file_types, is_csv_or_tsv } from "./constants";

let Papa; /* lazyily imported once a file is dropped on */

/**
Expand All @@ -8,15 +6,12 @@ let Papa; /* lazyily imported once a file is dropped on */
* in here annd, you guessed it, this causes all sorts of problems.
* https://github.com/mholt/PapaParse/issues/169 suggests adding encoding: "ISO-8859-1"
* to the config, which may work
* @param {DataTransfer} file a DataTransfer object
* @param {string} csvString a string of delimited text
*/
export const parseCsvTsv = async (file) => {
export const parseCsv = async (csvString) => {
if (!Papa) Papa = (await import("papaparse")).default;
return new Promise((resolve, reject) => {
if (!(is_csv_or_tsv(file))) {
reject(new Error("Cannot parse this filetype"));
}
Papa.parse(file, {
Papa.parse(csvString, {
header: true,
complete: (results) => {
resolve(results);
Expand All @@ -26,7 +21,7 @@ export const parseCsvTsv = async (file) => {
},
encoding: "UTF-8",
comments: "#",
delimiter: (csv_file_types.includes(file.type)) ? "," : "\t",
delimiter: ",",
skipEmptyLines: true,
dynamicTyping: false
});
Expand Down

0 comments on commit 56afe6d

Please sign in to comment.