From d7091fc65933e9c5f7872cc4acdae7c21b2f724b Mon Sep 17 00:00:00 2001 From: James Hadfield Date: Fri, 19 Mar 2021 10:51:05 +1300 Subject: [PATCH 1/3] Treat accessions and urls as special node traits The schema defines these as "special" property, and we use them to render the value and link to be rendered via `` within the tip-clicked panel. These should not be available as valid traits for general display. --- src/components/tree/infoPanels/click.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/tree/infoPanels/click.js b/src/components/tree/infoPanels/click.js index ed4fd02ac..06da7e1e6 100644 --- a/src/components/tree/infoPanels/click.js +++ b/src/components/tree/infoPanels/click.js @@ -218,7 +218,7 @@ const SampleDate = ({node, t}) => { const getTraitsToDisplay = (node) => { // TODO -- this should be centralised somewhere if (!node.node_attrs) return []; - const ignore = ["author", "div", "num_date", "gisaid_epi_isl", "genbank_accession"]; + const ignore = ["author", "div", "num_date", "gisaid_epi_isl", "genbank_accession", "accession", "url"]; return Object.keys(node.node_attrs).filter((k) => !ignore.includes(k)); }; From b11618bcaf873916ba4eef697d800ce099c39483 Mon Sep 17 00:00:00 2001 From: James Hadfield Date: Fri, 19 Mar 2021 13:01:03 +1300 Subject: [PATCH 2/3] Allow node_traits to define their own URLs This extends our interpretation of dataset-supplied traits to allow them to define a URL as well as a value. If a url is specified, then the value (in the tip-clicked panel) is rendered as a link. Closes #1307 --- src/components/tree/infoPanels/click.js | 11 ++++++++--- src/util/treeMiscHelpers.js | 20 +++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/components/tree/infoPanels/click.js b/src/components/tree/infoPanels/click.js index 06da7e1e6..913b9c318 100644 --- a/src/components/tree/infoPanels/click.js +++ b/src/components/tree/infoPanels/click.js @@ -96,8 +96,7 @@ const MutationTable = ({mutations}) => { const AccessionAndUrl = ({node}) => { - const accession = getAccessionFromNode(node); - const url = getUrlFromNode(node); + const {accession, url} = getAccessionFromNode(node); const genbank_accession = getTraitFromNode(node, "genbank_accession"); /* `gisaid_epi_isl` is a special value attached to nodes introduced during the 2019 nCoV outbreak. @@ -230,10 +229,16 @@ const Trait = ({node, trait, colorings}) => { value = Number.parseFloat(value_tmp).toPrecision(3); } } + if (!isValueValid(value)) return null; + const name = (colorings && colorings[trait] && colorings[trait].title) ? colorings[trait].title : trait; - return isValueValid(value) ? item(name, value) : null; + const url = getUrlFromNode(node, trait); + if (url) { + return ; + } + return item(name, value); }; /** diff --git a/src/util/treeMiscHelpers.js b/src/util/treeMiscHelpers.js index 6f003b4e7..b057084cd 100644 --- a/src/util/treeMiscHelpers.js +++ b/src/util/treeMiscHelpers.js @@ -69,15 +69,25 @@ export const getFullAuthorInfoFromNode = (node) => export const getAccessionFromNode = (node) => { /* see comment at top of this file */ - if (node.node_attrs && node.node_attrs.accession) { - return node.node_attrs.accession; + let accession, url; + if (node.node_attrs) { + if (isValueValid(node.node_attrs.accession)) { + accession = node.node_attrs.accession; + } + if (typeof node.node_attrs.url === "string") { + url = node.node_attrs.url; + } } - return undefined; + return {accession, url}; }; /* see comment at top of this file */ -export const getUrlFromNode = (node) => - (node.node_attrs && node.node_attrs.url) ? node.node_attrs.url : undefined; +export const getUrlFromNode = (node, trait) => { + if (node.node_attrs && node.node_attrs[trait] && typeof node.node_attrs[trait].url === "string") { + return node.node_attrs[trait].url; + } + return undefined; +}; /** * Traverses the tree and returns a set of genotype states such as From 0bac3b700a617c2ed2797cd1ac89e0e2a3781f51 Mon Sep 17 00:00:00 2001 From: James Hadfield Date: Fri, 19 Mar 2021 14:00:17 +1300 Subject: [PATCH 3/3] Improve validation of URLs & add tests This improves our validation of URLs which should improve app stability. --- src/components/tree/infoPanels/click.js | 33 +++++++++-------------- src/util/treeMiscHelpers.js | 31 ++++++++++++++++----- test/treeHelpers.test.js | 36 ++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 29 deletions(-) diff --git a/src/components/tree/infoPanels/click.js b/src/components/tree/infoPanels/click.js index 913b9c318..f9c8e88e6 100644 --- a/src/components/tree/infoPanels/click.js +++ b/src/components/tree/infoPanels/click.js @@ -42,15 +42,6 @@ const item = (key, value, href) => ( ); -const formatURL = (url) => { - if (url !== undefined && url.startsWith("https_")) { - return url.replace("https_", "https:"); - } else if (url !== undefined && url.startsWith("http_")) { - return url.replace("http_", "http:"); - } - return url; -}; - const Link = ({url, title, value}) => ( {title} @@ -96,12 +87,10 @@ const MutationTable = ({mutations}) => { const AccessionAndUrl = ({node}) => { - const {accession, url} = getAccessionFromNode(node); - const genbank_accession = getTraitFromNode(node, "genbank_accession"); - - /* `gisaid_epi_isl` is a special value attached to nodes introduced during the 2019 nCoV outbreak. - If set, the display is different from the normal behavior */ + /* If `gisaid_epi_isl` or `genbank_accession` exist as node attrs, these preempt normal use of `accession` and `url`. + These special values were introduced during the 2019 nCoV outbreak. */ const gisaid_epi_isl = getTraitFromNode(node, "gisaid_epi_isl"); + const genbank_accession = getTraitFromNode(node, "genbank_accession"); if (isValueValid(gisaid_epi_isl)) { return ( <> @@ -113,22 +102,24 @@ const AccessionAndUrl = ({node}) => { ); } - if (isValueValid(genbank_accession)) { return ( ); - } else if (isValueValid(accession) && isValueValid(url)) { + } + + const {accession, url} = getAccessionFromNode(node); + if (accession && url) { return ( - + ); - } else if (isValueValid(accession)) { + } else if (accession) { return ( item("Accession", accession) ); - } else if (isValueValid(url)) { + } else if (url) { return ( - + ); } return null; @@ -236,7 +227,7 @@ const Trait = ({node, trait, colorings}) => { trait; const url = getUrlFromNode(node, trait); if (url) { - return ; + return ; } return item(name, value); }; diff --git a/src/util/treeMiscHelpers.js b/src/util/treeMiscHelpers.js index b057084cd..82f4af346 100644 --- a/src/util/treeMiscHelpers.js +++ b/src/util/treeMiscHelpers.js @@ -74,21 +74,38 @@ export const getAccessionFromNode = (node) => { if (isValueValid(node.node_attrs.accession)) { accession = node.node_attrs.accession; } - if (typeof node.node_attrs.url === "string") { - url = node.node_attrs.url; - } + url = validateUrl(node.node_attrs.url); } return {accession, url}; }; /* see comment at top of this file */ export const getUrlFromNode = (node, trait) => { - if (node.node_attrs && node.node_attrs[trait] && typeof node.node_attrs[trait].url === "string") { - return node.node_attrs[trait].url; - } - return undefined; + if (!node.node_attrs || !node.node_attrs[trait]) return undefined; + return validateUrl(node.node_attrs[trait].url); }; +/** + * Check if a URL seems valid & return it. + * For historical reasons, we allow URLs to be defined as `http[s]_` and coerce these into `http[s]:` + * URls are interpreted by `new URL()` and thus may be returned with a trailing slash + * @param {String} url URL string to validate + * @returns {String|undefined} potentially modified URL string or `undefined` (if it doesn't seem valid) + */ +function validateUrl(url) { + if (url===undefined) return undefined; // urls are optional, so return early to avoid the console warning + try { + if (typeof url !== "string") throw new Error(); + if (url.startsWith("http_")) url = url.replace("http_", "http:"); // eslint-disable-line no-param-reassign + if (url.startsWith("https_")) url = url.replace("https_", "https:"); // eslint-disable-line no-param-reassign + const urlObj = new URL(url); + return urlObj.href; + } catch (err) { + console.warn(`Dataset provided the invalid URL ${url}`); + return undefined; + } +} + /** * Traverses the tree and returns a set of genotype states such as * {"nuc:123A", "S:418K"}. diff --git a/test/treeHelpers.test.js b/test/treeHelpers.test.js index 236cc0e72..362e820a6 100644 --- a/test/treeHelpers.test.js +++ b/test/treeHelpers.test.js @@ -1,4 +1,4 @@ -import { collectMutations } from "../src/util/treeMiscHelpers"; +import { collectMutations, getUrlFromNode, getAccessionFromNode } from "../src/util/treeMiscHelpers"; import { treeJsonToState } from "../src/util/treeJsonProcessing"; /** @@ -53,3 +53,37 @@ function getNodeByName(tree, name) { recurse(tree[0]); return namedNode; } + +describe('Extract various values from node_attrs', () => { + beforeEach(() => { + jest.spyOn(console, 'warn').mockImplementation(() => {}); + }); + + // the following test also covers `validateUrl` + test("getUrlFromNode correctly handles various URLs", () => { + expect(getUrlFromNode({}, "trait")).toEqual(undefined); // no node_attrs on node + expect(getUrlFromNode({node_attrs: {}}, "trait")).toEqual(undefined); // no "trait" defined in node_attrs + expect(getUrlFromNode({node_attrs: {trait: "str_value"}}, "trait")).toEqual(undefined); // incorrectly formatted "trait" + expect(getUrlFromNode({node_attrs: {trait: {}}}, "trait")).toEqual(undefined); // correctly formatted "trait", no URL + expect(getUrlFromNode({node_attrs: {trait: {url: 1234}}}, "trait")).toEqual(undefined); // invalid URL + expect(getUrlFromNode({node_attrs: {trait: {url: "bad url"}}}, "trait")).toEqual(undefined); // invalid URL + expect(getUrlFromNode({node_attrs: {trait: {url: "https://nextstrain.org"}}}, "trait")).toEqual("https://nextstrain.org/"); + expect(getUrlFromNode({node_attrs: {trait: {url: "http://nextstrain.org"}}}, "trait")).toEqual("http://nextstrain.org/"); + expect(getUrlFromNode({node_attrs: {trait: {url: "https_//nextstrain.org"}}}, "trait")).toEqual("https://nextstrain.org/"); // see code for details + expect(getUrlFromNode({node_attrs: {trait: {url: "http_//nextstrain.org"}}}, "trait")).toEqual("http://nextstrain.org/"); // see code for details + }); + + test("extract accession & corresponding URL from a node", () => { + expect(getAccessionFromNode({})) + .toStrictEqual({accession: undefined, url: undefined}); // no node_attrs on node + expect(getAccessionFromNode({node_attrs: {}})) + .toStrictEqual({accession: undefined, url: undefined}); // no "accession" on node_attrs + expect(getAccessionFromNode({node_attrs: {accession: "MK049251", url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}})) + .toStrictEqual({accession: "MK049251", url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}); + expect(getAccessionFromNode({node_attrs: {url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}})) + .toStrictEqual({accession: undefined, url: "https://www.ncbi.nlm.nih.gov/nuccore/MK049251"}); // url can be defined without accession + expect(getAccessionFromNode({node_attrs: {accession: "MK049251", url: "nuccore/MK049251"}})) + .toStrictEqual({accession: "MK049251", url: undefined}); // invalid URL + }); + +});