From 0bac3b700a617c2ed2797cd1ac89e0e2a3781f51 Mon Sep 17 00:00:00 2001 From: James Hadfield Date: Fri, 19 Mar 2021 14:00:17 +1300 Subject: [PATCH] Improve validation of URLs & add tests This improves our validation of URLs which should improve app stability. --- src/components/tree/infoPanels/click.js | 33 +++++++++-------------- src/util/treeMiscHelpers.js | 31 ++++++++++++++++----- test/treeHelpers.test.js | 36 ++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 29 deletions(-) diff --git a/src/components/tree/infoPanels/click.js b/src/components/tree/infoPanels/click.js index 913b9c318..f9c8e88e6 100644 --- a/src/components/tree/infoPanels/click.js +++ b/src/components/tree/infoPanels/click.js @@ -42,15 +42,6 @@ const item = (key, value, href) => ( ); -const formatURL = (url) => { - if (url !== undefined && url.startsWith("https_")) { - return url.replace("https_", "https:"); - } else if (url !== undefined && url.startsWith("http_")) { - return url.replace("http_", "http:"); - } - return url; -}; - const Link = ({url, title, value}) => ( {title} @@ -96,12 +87,10 @@ const MutationTable = ({mutations}) => { const AccessionAndUrl = ({node}) => { - const {accession, url} = getAccessionFromNode(node); - const genbank_accession = getTraitFromNode(node, "genbank_accession"); - - /* `gisaid_epi_isl` is a special value attached to nodes introduced during the 2019 nCoV outbreak. - If set, the display is different from the normal behavior */ + /* If `gisaid_epi_isl` or `genbank_accession` exist as node attrs, these preempt normal use of `accession` and `url`. + These special values were introduced during the 2019 nCoV outbreak. */ const gisaid_epi_isl = getTraitFromNode(node, "gisaid_epi_isl"); + const genbank_accession = getTraitFromNode(node, "genbank_accession"); if (isValueValid(gisaid_epi_isl)) { return ( <> @@ -113,22 +102,24 @@ const AccessionAndUrl = ({node}) => { ); } - if (isValueValid(genbank_accession)) { return ( ); - } else if (isValueValid(accession) && isValueValid(url)) { + } + + const {accession, url} = getAccessionFromNode(node); + if (accession && url) { return ( - + ); - } else if (isValueValid(accession)) { + } else if (accession) { return ( item("Accession", accession) ); - } else if (isValueValid(url)) { + } else if (url) { return ( - + ); } return null; @@ -236,7 +227,7 @@ const Trait = ({node, trait, colorings}) => { trait; const url = getUrlFromNode(node, trait); if (url) { - return ; + return ; } return item(name, value); }; diff --git a/src/util/treeMiscHelpers.js b/src/util/treeMiscHelpers.js index b057084cd..82f4af346 100644 --- a/src/util/treeMiscHelpers.js +++ b/src/util/treeMiscHelpers.js @@ -74,21 +74,38 @@ export const getAccessionFromNode = (node) => { if (isValueValid(node.node_attrs.accession)) { accession = node.node_attrs.accession; } - if (typeof node.node_attrs.url === "string") { - url = node.node_attrs.url; - } + url = validateUrl(node.node_attrs.url); } return {accession, url}; }; /* see comment at top of this file */ export const getUrlFromNode = (node, trait) => { - if (node.node_attrs && node.node_attrs[trait] && typeof node.node_attrs[trait].url === "string") { - return node.node_attrs[trait].url; - } - return undefined; + if (!node.node_attrs || !node.node_attrs[trait]) return undefined; + return validateUrl(node.node_attrs[trait].url); }; +/** + * Check if a URL seems valid & return it. + * For historical reasons, we allow URLs to be defined as `http[s]_` and coerce these into `http[s]:` + * URls are interpreted by `new URL()` and thus may be returned with a trailing slash + * @param {String} url URL string to validate + * @returns {String|undefined} potentially modified URL string or `undefined` (if it doesn't seem valid) + */ +function validateUrl(url) { + if (url===undefined) return undefined; // urls are optional, so return early to avoid the console warning + try { + if (typeof url !== "string") throw new Error(); + if (url.startsWith("http_")) url = url.replace("http_", "http:"); // eslint-disable-line no-param-reassign + if (url.startsWith("https_")) url = url.replace("https_", "https:"); // eslint-disable-line no-param-reassign + const urlObj = new URL(url); + return urlObj.href; + } catch (err) { + console.warn(`Dataset provided the invalid URL ${url}`); + return undefined; + } +} + /** * Traverses the tree and returns a set of genotype states such as * {"nuc:123A", "S:418K"}. diff --git a/test/treeHelpers.test.js b/test/treeHelpers.test.js index 236cc0e72..362e820a6 100644 --- a/test/treeHelpers.test.js +++ b/test/treeHelpers.test.js @@ -1,4 +1,4 @@ -import { collectMutations } from "../src/util/treeMiscHelpers"; +import { collectMutations, getUrlFromNode, getAccessionFromNode } from "../src/util/treeMiscHelpers"; import { treeJsonToState } from "../src/util/treeJsonProcessing"; /** @@ -53,3 +53,37 @@ function getNodeByName(tree, name) { recurse(tree[0]); return namedNode; } + +describe('Extract various values from node_attrs', () => { + beforeEach(() => { + jest.spyOn(console, 'warn').mockImplementation(() => {}); + }); + + // the following test also covers `validateUrl` + test("getUrlFromNode correctly handles various URLs", () => { + expect(getUrlFromNode({}, "trait")).toEqual(undefined); // no node_attrs on node + expect(getUrlFromNode({node_attrs: {}}, "trait")).toEqual(undefined); // no "trait" defined in node_attrs + expect(getUrlFromNode({node_attrs: {trait: "str_value"}}, "trait")).toEqual(undefined); // incorrectly formatted "trait" + expect(getUrlFromNode({node_attrs: {trait: {}}}, "trait")).toEqual(undefined); // correctly formatted "trait", no URL + expect(getUrlFromNode({node_attrs: {trait: {url: 1234}}}, "trait")).toEqual(undefined); // invalid URL + expect(getUrlFromNode({node_attrs: {trait: {url: "bad url"}}}, "trait")).toEqual(undefined); // invalid URL + expect(getUrlFromNode({node_attrs: {trait: {url: "https://nextstrain.org"}}}, "trait")).toEqual("https://nextstrain.org/"); + expect(getUrlFromNode({node_attrs: {trait: {url: "http://nextstrain.org"}}}, "trait")).toEqual("http://nextstrain.org/"); + expect(getUrlFromNode({node_attrs: {trait: {url: "https_//nextstrain.org"}}}, "trait")).toEqual("https://nextstrain.org/"); // see code for details + expect(getUrlFromNode({node_attrs: {trait: {url: "http_//nextstrain.org"}}}, "trait")).toEqual("http://nextstrain.org/"); // see code for details + }); + + test("extract accession & corresponding URL from a node", () => { + expect(getAccessionFromNode({})) + .toStrictEqual({accession: undefined, url: undefined}); // no node_attrs on node + expect(getAccessionFromNode({node_attrs: {}})) + .toStrictEqual({accession: undefined, url: undefined}); // no "accession" on node_attrs + expect(getAccessionFromNode({node_attrs: {accession: "MK049251", url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}})) + .toStrictEqual({accession: "MK049251", url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}); + expect(getAccessionFromNode({node_attrs: {url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}})) + .toStrictEqual({accession: undefined, url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}); // url can be defined without accession + expect(getAccessionFromNode({node_attrs: {accession: "MK049251", url: "nuccore/MK049251"}})) + .toStrictEqual({accession: "MK049251", url: undefined}); // invalid URL + }); + +});