Skip to content

Commit

Permalink
Improve validation of URLs & add tests
Browse files Browse the repository at this point in the history
This improves our validation of URLs which should improve app stability.
  • Loading branch information
jameshadfield committed Mar 19, 2021
1 parent b11618b commit 0bac3b7
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 29 deletions.
33 changes: 12 additions & 21 deletions src/components/tree/infoPanels/click.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,6 @@ const item = (key, value, href) => (
</tr>
);

const formatURL = (url) => {
if (url !== undefined && url.startsWith("https_")) {
return url.replace("https_", "https:");
} else if (url !== undefined && url.startsWith("http_")) {
return url.replace("http_", "http:");
}
return url;
};

const Link = ({url, title, value}) => (
<tr>
<th style={infoPanelStyles.item}>{title}</th>
Expand Down Expand Up @@ -96,12 +87,10 @@ const MutationTable = ({mutations}) => {


const AccessionAndUrl = ({node}) => {
const {accession, url} = getAccessionFromNode(node);
const genbank_accession = getTraitFromNode(node, "genbank_accession");

/* `gisaid_epi_isl` is a special value attached to nodes introduced during the 2019 nCoV outbreak.
If set, the display is different from the normal behavior */
/* If `gisaid_epi_isl` or `genbank_accession` exist as node attrs, these preempt normal use of `accession` and `url`.
These special values were introduced during the 2019 nCoV outbreak. */
const gisaid_epi_isl = getTraitFromNode(node, "gisaid_epi_isl");
const genbank_accession = getTraitFromNode(node, "genbank_accession");
if (isValueValid(gisaid_epi_isl)) {
return (
<>
Expand All @@ -113,22 +102,24 @@ const AccessionAndUrl = ({node}) => {
</>
);
}

if (isValueValid(genbank_accession)) {
return (
<Link title={"Genbank accession"} value={genbank_accession} url={"https://www.ncbi.nlm.nih.gov/nuccore/" + genbank_accession}/>
);
} else if (isValueValid(accession) && isValueValid(url)) {
}

const {accession, url} = getAccessionFromNode(node);
if (accession && url) {
return (
<Link url={formatURL(url)} value={accession} title={"Accession"}/>
<Link url={url} value={accession} title={"Accession"}/>
);
} else if (isValueValid(accession)) {
} else if (accession) {
return (
item("Accession", accession)
);
} else if (isValueValid(url)) {
} else if (url) {
return (
<Link title={"Strain URL"} url={formatURL(url)} value={"click here"}/>
<Link title={"Strain URL"} url={url} value={"click here"}/>
);
}
return null;
Expand Down Expand Up @@ -236,7 +227,7 @@ const Trait = ({node, trait, colorings}) => {
trait;
const url = getUrlFromNode(node, trait);
if (url) {
return <Link title={name} url={formatURL(url)} value={value}/>;
return <Link title={name} url={url} value={value}/>;
}
return item(name, value);
};
Expand Down
31 changes: 24 additions & 7 deletions src/util/treeMiscHelpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,38 @@ export const getAccessionFromNode = (node) => {
if (isValueValid(node.node_attrs.accession)) {
accession = node.node_attrs.accession;
}
if (typeof node.node_attrs.url === "string") {
url = node.node_attrs.url;
}
url = validateUrl(node.node_attrs.url);
}
return {accession, url};
};

/* see comment at top of this file */
export const getUrlFromNode = (node, trait) => {
if (node.node_attrs && node.node_attrs[trait] && typeof node.node_attrs[trait].url === "string") {
return node.node_attrs[trait].url;
}
return undefined;
if (!node.node_attrs || !node.node_attrs[trait]) return undefined;
return validateUrl(node.node_attrs[trait].url);
};

/**
* Check if a URL seems valid & return it.
* For historical reasons, we allow URLs to be defined as `http[s]_` and coerce these into `http[s]:`
* URls are interpreted by `new URL()` and thus may be returned with a trailing slash
* @param {String} url URL string to validate
* @returns {String|undefined} potentially modified URL string or `undefined` (if it doesn't seem valid)
*/
function validateUrl(url) {
if (url===undefined) return undefined; // urls are optional, so return early to avoid the console warning
try {
if (typeof url !== "string") throw new Error();
if (url.startsWith("http_")) url = url.replace("http_", "http:"); // eslint-disable-line no-param-reassign
if (url.startsWith("https_")) url = url.replace("https_", "https:"); // eslint-disable-line no-param-reassign
const urlObj = new URL(url);
return urlObj.href;
} catch (err) {
console.warn(`Dataset provided the invalid URL ${url}`);
return undefined;
}
}

/**
* Traverses the tree and returns a set of genotype states such as
* {"nuc:123A", "S:418K"}.
Expand Down
36 changes: 35 additions & 1 deletion test/treeHelpers.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { collectMutations } from "../src/util/treeMiscHelpers";
import { collectMutations, getUrlFromNode, getAccessionFromNode } from "../src/util/treeMiscHelpers";
import { treeJsonToState } from "../src/util/treeJsonProcessing";

/**
Expand Down Expand Up @@ -53,3 +53,37 @@ function getNodeByName(tree, name) {
recurse(tree[0]);
return namedNode;
}

describe('Extract various values from node_attrs', () => {
beforeEach(() => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
});

// the following test also covers `validateUrl`
test("getUrlFromNode correctly handles various URLs", () => {
expect(getUrlFromNode({}, "trait")).toEqual(undefined); // no node_attrs on node
expect(getUrlFromNode({node_attrs: {}}, "trait")).toEqual(undefined); // no "trait" defined in node_attrs
expect(getUrlFromNode({node_attrs: {trait: "str_value"}}, "trait")).toEqual(undefined); // incorrectly formatted "trait"
expect(getUrlFromNode({node_attrs: {trait: {}}}, "trait")).toEqual(undefined); // correctly formatted "trait", no URL
expect(getUrlFromNode({node_attrs: {trait: {url: 1234}}}, "trait")).toEqual(undefined); // invalid URL
expect(getUrlFromNode({node_attrs: {trait: {url: "bad url"}}}, "trait")).toEqual(undefined); // invalid URL
expect(getUrlFromNode({node_attrs: {trait: {url: "https://nextstrain.org"}}}, "trait")).toEqual("https://nextstrain.org/");
expect(getUrlFromNode({node_attrs: {trait: {url: "http://nextstrain.org"}}}, "trait")).toEqual("http://nextstrain.org/");
expect(getUrlFromNode({node_attrs: {trait: {url: "https_//nextstrain.org"}}}, "trait")).toEqual("https://nextstrain.org/"); // see code for details
expect(getUrlFromNode({node_attrs: {trait: {url: "http_//nextstrain.org"}}}, "trait")).toEqual("http://nextstrain.org/"); // see code for details
});

test("extract accession & corresponding URL from a node", () => {
expect(getAccessionFromNode({}))
.toStrictEqual({accession: undefined, url: undefined}); // no node_attrs on node
expect(getAccessionFromNode({node_attrs: {}}))
.toStrictEqual({accession: undefined, url: undefined}); // no "accession" on node_attrs
expect(getAccessionFromNode({node_attrs: {accession: "MK049251", url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}}))
.toStrictEqual({accession: "MK049251", url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"});
expect(getAccessionFromNode({node_attrs: {url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}}))
.toStrictEqual({accession: undefined, url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}); // url can be defined without accession
expect(getAccessionFromNode({node_attrs: {accession: "MK049251", url: "nuccore/MK049251"}}))
.toStrictEqual({accession: "MK049251", url: undefined}); // invalid URL
});

});

0 comments on commit 0bac3b7

Please sign in to comment.