Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use Auspice JSON as a dataset #1455

Merged
merged 20 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
0ec7adc
feat: add ref and annotation data to Auspice tree types
ivan-aksamentov May 13, 2024
1043b98
refactor: add pathogen nextclade extension to auspice tree type
ivan-aksamentov May 16, 2024
4334f32
feat: use Auspice JSON as dataset
ivan-aksamentov May 16, 2024
b843ada
fix: parsing auspice genome annotations
ivan-aksamentov May 16, 2024
ff7e887
fix: off-by-one in landmark range
ivan-aksamentov May 17, 2024
9b952bf
fix: duplicated start and end fields in the annotation of output tree
ivan-aksamentov May 17, 2024
48d163c
feat: accept Auspice JSON genome annotation in read-annotation command
ivan-aksamentov May 17, 2024
1fc4936
refactor: aggregate inputs loading
ivan-aksamentov May 23, 2024
a27ee66
feat(web): add url parameter`dataset-json-url`
ivan-aksamentov May 23, 2024
fb029d5
Merge remote-tracking branch 'origin/master' into feat/ref-and-ann-fr…
ivan-aksamentov May 23, 2024
b1b3f5f
fix(web): prevent crash when an auspice dataset was used in prev session
ivan-aksamentov May 23, 2024
e5ee068
fix(web): prevent crash when auspice json has no `.root_sequence`
ivan-aksamentov May 23, 2024
883a0d6
refactor: lint
ivan-aksamentov May 23, 2024
9f3c1e0
fix(web): specifically accept json
ivan-aksamentov May 24, 2024
fc7b8bd
fix(web): hide "Load examples" button when examples are not in dataset
ivan-aksamentov May 24, 2024
a3c120b
Merge remote-tracking branch 'origin/master' into feat/ref-and-ann-fr…
ivan-aksamentov May 24, 2024
ddd9925
fix: make dataset files optional
ivan-aksamentov May 24, 2024
fe260c6
feat: allow to override dataset components when Auspice dataset
ivan-aksamentov May 24, 2024
82e69a1
fix(web): don't error when ref missing from auspice json but is provi…
ivan-aksamentov May 24, 2024
44fb8a5
feat(web): take title, description and update date from Auspice JSON
ivan-aksamentov May 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 27 additions & 39 deletions packages/nextclade-cli/src/dataset/dataset_download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ use color_eyre::{Section, SectionExt};
use eyre::{eyre, ContextCompat, Report, WrapErr};
use itertools::Itertools;
use log::{warn, LevelFilter};
use nextclade::analyze::virus_properties::{LabelledMutationsConfig, VirusProperties};
use nextclade::analyze::virus_properties::VirusProperties;
use nextclade::gene::gene_map::{filter_gene_map, GeneMap};
use nextclade::io::dataset::{Dataset, DatasetFiles, DatasetMeta, DatasetsIndexJson};
use nextclade::io::dataset::{Dataset, DatasetsIndexJson};
use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str};
use nextclade::io::file::create_file_or_stdout;
use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string};
Expand All @@ -17,7 +17,7 @@ use nextclade::utils::fs::list_files_recursive;
use nextclade::utils::option::OptionMapRefFallible;
use nextclade::utils::string::{format_list, surround_with_quotes, Indent};
use nextclade::{make_error, make_internal_error, o};
use std::collections::{BTreeMap, BTreeSet};
use std::collections::BTreeSet;
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek, Write};
use std::ops::Deref;
Expand All @@ -35,13 +35,16 @@ pub fn nextclade_get_inputs(
if input_dataset.is_file() && has_extension(input_dataset, "zip") {
dataset_zip_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else if input_dataset.is_file() && has_extension(input_dataset, "json") {
dataset_json_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else if input_dataset.is_dir() {
dataset_dir_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else {
make_error!(
"--input-dataset: path is invalid. \
Expected a directory path or a zip archive file path, but got: '{input_dataset:#?}'"
Expected a directory path, a zip file path or json file path, but got: '{input_dataset:#?}'"
)
}
} else {
Expand Down Expand Up @@ -283,6 +286,25 @@ pub fn dataset_dir_load(
})
}

pub fn dataset_json_load(
_run_args: &NextcladeRunArgs,
dataset_json: impl AsRef<Path>,
_cdses: &Option<Vec<String>>,
) -> Result<NextcladeParams, Report> {
let dataset_json = dataset_json.as_ref();

// let NextcladeRunInputArgs {
// input_ref,
// input_tree,
// input_pathogen_json,
// input_annotation,
// ..
// } = &run_args.inputs;

let auspice_json = AuspiceTree::from_path(dataset_json).wrap_err("When reading Auspice JSON v2")?;
NextcladeParams::from_auspice(&auspice_json)
}

pub fn dataset_individual_files_load(
run_args: &NextcladeRunArgs,
cdses: &Option<Vec<String>>,
Expand All @@ -297,41 +319,7 @@ pub fn dataset_individual_files_load(
.and_then(|input_pathogen_json| read_file_to_string(input_pathogen_json).ok())
.map_ref_fallible(VirusProperties::from_str)
.wrap_err("When reading pathogen JSON")?
.unwrap_or_else(|| {
// The only case where we allow pathogen.json to be missing is when there's no dataset and files are provided
// explicitly through args. Let's create a dummy value to avoid making the field optional,
// and avoid adding `Default` trait.
VirusProperties {
schema_version: "".to_owned(),
attributes: BTreeMap::default(),
shortcuts: vec![],
meta: DatasetMeta::default(),
files: DatasetFiles {
reference: "".to_owned(),
pathogen_json: "".to_owned(),
genome_annotation: None,
tree_json: None,
examples: None,
readme: None,
changelog: None,
rest_files: BTreeMap::default(),
other: serde_json::Value::default(),
},
default_cds: None,
cds_order_preference: vec![],
mut_labels: LabelledMutationsConfig::default(),
qc: None,
general_params: None,
alignment_params: None,
tree_builder_params: None,
phenotype_data: None,
aa_motifs: vec![],
versions: vec![],
version: None,
compatibility: None,
other: serde_json::Value::default(),
}
});
.unwrap_or_default();

let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?;

Expand Down
3 changes: 1 addition & 2 deletions packages/nextclade-web/src/components/Error/ErrorContent.tsx
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import React, { useCallback, useMemo, useState } from 'react'
import { Button, Col, Row } from 'reactstrap'
import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl'
import styled from 'styled-components'
import { CopyToClipboard } from 'react-copy-to-clipboard'
import { FaClipboardCheck, FaClipboardList } from 'react-icons/fa'

import { ErrorGeneric } from 'src/components/Error/error-types/ErrorGeneric'
import { ErrorNetworkConnectionFailure } from 'src/components/Error/error-types/ErrorNetworkConnectionFailure'
import { ErrorNetworkRequestFailure } from 'src/components/Error/error-types/ErrorNetworkRequestFailure'
import { NextcladeV2ErrorContent } from 'src/components/Error/error-types/NextcladeV2ErrorContent'
import { ErrorContentExplanation, getErrorReportText } from 'src/components/Error/ErrorContentExplanation'
import { sanitizeError } from 'src/helpers/sanitizeError'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory'
import { HttpRequestError } from 'src/io/axiosFetch'
import { ErrorMessageMonospace } from './ErrorStyles'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import React, { useMemo } from 'react'
import { ErrorContainer, ErrorMessage } from 'src/components/Error/ErrorStyles'
import { LinkExternal } from 'src/components/Link/LinkExternal'
import { PROJECT_NAME, RELEASE_OLD_URL } from 'src/constants'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory'
import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl'
import urljoin from 'url-join'

export interface Props {
Expand Down
56 changes: 53 additions & 3 deletions packages/nextclade-web/src/hooks/useRunAnalysis.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import type { AuspiceJsonV2, CladeNodeAttrDesc } from 'auspice'

import { changeColorBy } from 'auspice/src/actions/colors'
import { concurrent } from 'fasy'
import { useRouter } from 'next/router'
import { useDispatch } from 'react-redux'
import { useRecoilCallback } from 'recoil'
import { ErrorInternal } from 'src/helpers/ErrorInternal'
import { clearAllFiltersAtom } from 'src/state/resultFilters.state'
import { viewedCdsAtom } from 'src/state/seqViewSettings.state'
import { AlgorithmGlobalStatus } from 'src/types'
import { AlgorithmGlobalStatus, AlgorithmInput, Dataset, NextcladeParamsRaw, NextcladeParamsRawDir } from 'src/types'
import { sanitizeError } from 'src/helpers/sanitizeError'
import { auspiceStartClean, treeFilterByNodeType } from 'src/state/auspice/auspice.actions'
import { createAuspiceState } from 'src/state/auspice/createAuspiceState'
import { datasetCurrentAtom, cdsOrderPreferenceAtom } from 'src/state/dataset.state'
import { globalErrorAtom } from 'src/state/error.state'
import {
datasetJsonAtom,
geneMapInputAtom,
qrySeqInputsStorageAtom,
refSeqInputAtom,
Expand All @@ -35,6 +37,7 @@ import {
} from 'src/state/results.state'
import { numThreadsAtom, showNewRunPopupAtom } from 'src/state/settings.state'
import { launchAnalysis, LaunchAnalysisCallbacks, LaunchAnalysisInputs } from 'src/workers/launchAnalysis'
import { axiosFetchRaw } from 'src/io/axiosFetch'

export function useRunAnalysis() {
const router = useRouter()
Expand All @@ -60,6 +63,8 @@ export function useRunAnalysis() {
const qryInputs = getPromise(qrySeqInputsStorageAtom)
const csvColumnConfig = getPromise(csvColumnConfigAtom)

const datasetJsonPromise = getPromise(datasetJsonAtom)

const inputs: LaunchAnalysisInputs = {
refSeq: getPromise(refSeqInputAtom),
geneMap: getPromise(geneMapInputAtom),
Expand Down Expand Up @@ -130,7 +135,22 @@ export function useRunAnalysis() {
.push('/results', '/results')
.then(async () => {
set(analysisStatusGlobalAtom, AlgorithmGlobalStatus.initWorkers)
return launchAnalysis(qryInputs, inputs, callbacks, datasetCurrent, numThreads, csvColumnConfig)

const tree = await datasetJsonPromise

let params: NextcladeParamsRaw
if (tree) {
params = { Auspice: { tree: JSON.stringify(tree) } }
} else {
const dataset = await datasetCurrent
if (!dataset) {
throw new ErrorInternal('Dataset is required but not found')
}
const data = await getParams(inputs, dataset)
params = { Dir: data }
}

return launchAnalysis(qryInputs, params, callbacks, numThreads, csvColumnConfig)
})
.catch((error) => {
set(analysisStatusGlobalAtom, AlgorithmGlobalStatus.failed)
Expand All @@ -140,3 +160,33 @@ export function useRunAnalysis() {
[router, dispatch],
)
}

/** Resolves all param inputs into strings */
async function getParams(paramInputs: LaunchAnalysisInputs, dataset: Dataset): Promise<NextcladeParamsRawDir> {
const entries = [
{ key: 'geneMap', input: paramInputs.geneMap, datasetFileUrl: dataset.files.genomeAnnotation },
{ key: 'refSeq', input: paramInputs.refSeq, datasetFileUrl: dataset.files.reference },
{ key: 'tree', input: paramInputs.tree, datasetFileUrl: dataset.files.treeJson },
{ key: 'virusProperties', input: paramInputs.virusProperties, datasetFileUrl: dataset.files.pathogenJson },
]

return Object.fromEntries(
await concurrent.map(async ({ key, input, datasetFileUrl }) => {
return [key, await resolveInput(await input, datasetFileUrl)]
}, entries),
) as unknown as NextcladeParamsRawDir
}

async function resolveInput(input: AlgorithmInput | undefined, datasetFileUrl: string | undefined) {
// If data is provided explicitly, load it
if (input) {
return input.getContent()
}

// Otherwise fetch corresponding file from the dataset
if (datasetFileUrl) {
return axiosFetchRaw(datasetFileUrl)
}

return undefined
}
8 changes: 6 additions & 2 deletions packages/nextclade-web/src/io/fetchDatasets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import {
parseGithubRepoUrl,
} from 'src/io/fetchSingleDatasetFromGithub'

import { Dataset } from 'src/types'
import { type AuspiceTree, Dataset } from 'src/types'
import {
fetchDatasetsIndex,
filterDatasets,
Expand Down Expand Up @@ -128,7 +128,11 @@ export async function initializeDatasets(datasetServerUrl: string, urlQuery: Par
const minimizerIndexVersion = await getCompatibleMinimizerIndexVersion(datasetServerUrl, datasetsIndexJson)

// Check if URL params specify dataset params and try to find the corresponding dataset
const currentDataset = await getDatasetFromUrlParams(urlQuery, datasets)
const currentDataset:
| (Dataset & {
auspiceJson?: AuspiceTree
})
| undefined = await getDatasetFromUrlParams(urlQuery, datasets)

return { datasets, currentDataset, minimizerIndexVersion }
}
Expand Down
35 changes: 29 additions & 6 deletions packages/nextclade-web/src/io/fetchSingleDataset.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,39 @@
import type { ParsedUrlQuery } from 'querystring'
import { ErrorFatal } from 'src/helpers/ErrorFatal'
import { fetchSingleDatasetAuspice } from 'src/io/fetchSingleDatasetAuspice'
import { fetchSingleDatasetDirectory } from 'src/io/fetchSingleDatasetDirectory'
import { getQueryParamMaybe } from 'src/io/getQueryParamMaybe'
import { fetchSingleDatasetFromUrl } from 'src/io/fetchSingleDatasetFromUrl'
import { isGithubUrlOrShortcut, parseGitHubRepoUrlOrShortcut } from 'src/io/fetchSingleDatasetFromGithub'

export async function fetchSingleDataset(urlQuery: ParsedUrlQuery) {
const datasetUrl = getQueryParamMaybe(urlQuery, 'dataset-url')
if (!datasetUrl) {
const datasetUrlJson = getQueryParamMaybe(urlQuery, 'dataset-json-url')

if (datasetUrl && datasetUrlJson) {
throw new ErrorFatal(
"URL parameters 'dataset-url' and 'dataset-url-json' are mutually exclusive, but both provided. Please remove one or the other.",
)
}

let finalUrl
let options
let fetchFunction

if (datasetUrl) {
finalUrl = datasetUrl
fetchFunction = fetchSingleDatasetDirectory
} else if (datasetUrlJson) {
finalUrl = datasetUrlJson
fetchFunction = fetchSingleDatasetAuspice
} else {
return undefined
}
if (isGithubUrlOrShortcut(datasetUrl)) {
const { directUrl } = await parseGitHubRepoUrlOrShortcut(datasetUrl)
return fetchSingleDatasetFromUrl(directUrl, { datasetOriginalUrl: datasetUrl })

if (isGithubUrlOrShortcut(finalUrl)) {
const { directUrl } = await parseGitHubRepoUrlOrShortcut(finalUrl)
options = { datasetOriginalUrl: finalUrl }
finalUrl = directUrl
}
return fetchSingleDatasetFromUrl(datasetUrl)

return fetchFunction(finalUrl, options)
}
38 changes: 38 additions & 0 deletions packages/nextclade-web/src/io/fetchSingleDatasetAuspice.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { isEmpty } from 'lodash'
import { FatalError } from 'next/dist/lib/fatal-error'
import { attrStrMaybe, AuspiceTree, Dataset, DatasetFiles } from 'src/types'
import { removeTrailingSlash } from 'src/io/url'
import { axiosFetch } from 'src/io/axiosFetch'

export async function fetchSingleDatasetAuspice(datasetJsonUrl_: string) {
const datasetJsonUrl = removeTrailingSlash(datasetJsonUrl_)

const auspiceJson = await axiosFetch<AuspiceTree>(datasetJsonUrl)
const pathogen = auspiceJson.meta?.extensions?.nextclade?.pathogen

if (isEmpty(auspiceJson.root_sequence?.nuc)) {
ivan-aksamentov marked this conversation as resolved.
Show resolved Hide resolved
throw new FatalError(`Auspice JSON does not contain required field '.root_sequence.nuc': ${datasetJsonUrl_}`)
}

const currentDataset: Dataset & { auspiceJson?: AuspiceTree } = {
path: datasetJsonUrl,
capabilities: {
primers: false,
qc: [],
},
...pathogen,

// HACK: there is no files if dataset comes from Auspice JSON, neither they are needed. What to do?
files: {} as unknown as DatasetFiles,
jameshadfield marked this conversation as resolved.
Show resolved Hide resolved
jameshadfield marked this conversation as resolved.
Show resolved Hide resolved

auspiceJson,
}

const datasets = [currentDataset]
const defaultDataset = currentDataset
const currentDatasetName = currentDataset.path
const defaultDatasetName = currentDatasetName
const defaultDatasetNameFriendly = attrStrMaybe(currentDataset.attributes, 'name') ?? currentDatasetName

return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset }
}
Loading