Skip to content

Commit

Permalink
Merge pull request #1474 from nextstrain/feat/warn-ref-mismatch
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Jun 5, 2024
2 parents 1aaa360 + 54a4e06 commit f9d426c
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 2 deletions.
32 changes: 31 additions & 1 deletion packages/nextclade-cli/src/dataset/dataset_download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str};
use nextclade::io::file::create_file_or_stdout;
use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string};
use nextclade::run::nextclade_wasm::{NextcladeParams, NextcladeParamsOptional};
use nextclade::tree::tree::AuspiceTree;
use nextclade::tree::tree::{check_ref_seq_mismatch, AuspiceTree};
use nextclade::utils::fs::list_files_recursive;
use nextclade::utils::option::OptionMapRefFallible;
use nextclade::utils::string::{format_list, surround_with_quotes, Indent};
Expand Down Expand Up @@ -143,6 +143,12 @@ pub fn dataset_zip_load(

verify_dataset_files(&virus_properties, zip.file_names());

if let Some(tree) = &tree {
if let Some(tree_ref) = tree.root_sequence() {
check_ref_seq_mismatch(&ref_record.seq, tree_ref)?;
}
}

Ok(NextcladeParams {
ref_record,
gene_map,
Expand Down Expand Up @@ -283,6 +289,12 @@ pub fn dataset_dir_load(
.collect_vec();
verify_dataset_files(&virus_properties, dataset_dir_files.iter());

if let Some(tree) = &tree {
if let Some(tree_ref) = tree.root_sequence() {
check_ref_seq_mismatch(&ref_record.seq, tree_ref)?;
}
}

Ok(NextcladeParams {
ref_record,
gene_map,
Expand Down Expand Up @@ -325,6 +337,12 @@ pub fn dataset_json_load(
.map_ref_fallible(GeneMap::from_path)
.wrap_err("When parsing genome annotation")?;

if let (Some(tree), Some(ref_record)) = (&tree, &ref_record) {
if let Some(tree_ref) = tree.root_sequence() {
check_ref_seq_mismatch(&ref_record.seq, tree_ref)?;
}
}

NextcladeParamsOptional {
ref_record,
gene_map,
Expand Down Expand Up @@ -370,6 +388,12 @@ pub fn dataset_individual_files_load(
.map_ref_fallible(AuspiceTree::from_path)
.wrap_err("When reading reference tree JSON")?;

if let Some(tree) = &tree {
if let Some(tree_ref) = tree.root_sequence() {
check_ref_seq_mismatch(&ref_record.seq, tree_ref)?;
}
}

Ok(NextcladeParams {
ref_record,
gene_map,
Expand Down Expand Up @@ -439,6 +463,12 @@ pub fn dataset_str_download_and_load(
.map_ref_fallible(AuspiceTree::from_str)
.wrap_err("When reading reference tree from dataset")?;

if let Some(tree) = &tree {
if let Some(tree_ref) = tree.root_sequence() {
check_ref_seq_mismatch(&ref_record.seq, tree_ref)?;
}
}

Ok(NextcladeParams {
ref_record,
gene_map,
Expand Down
14 changes: 13 additions & 1 deletion packages/nextclade/src/run/nextclade_wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::run::nextclade_run_one::nextclade_run_one;
use crate::run::params::{NextcladeInputParams, NextcladeInputParamsOptional};
use crate::translate::translate_genes::Translation;
use crate::translate::translate_genes_ref::translate_genes_ref;
use crate::tree::tree::{AuspiceGraph, AuspiceTree, CladeNodeAttrKeyDesc};
use crate::tree::tree::{check_ref_seq_mismatch, AuspiceGraph, AuspiceTree, CladeNodeAttrKeyDesc};
use crate::tree::tree_builder::graph_attach_new_nodes_in_place;
use crate::tree::tree_preprocess::graph_preprocess_in_place;
use crate::types::outputs::NextcladeOutputs;
Expand Down Expand Up @@ -133,6 +133,12 @@ impl NextcladeParams {
.map_ref_fallible(GeneMap::from_str)
.wrap_err("When parsing genome annotation")?;

if let (Some(tree), Some(ref_record)) = (&tree, &ref_record) {
if let Some(tree_ref) = tree.root_sequence() {
check_ref_seq_mismatch(&ref_record.seq, tree_ref)?;
}
}

NextcladeParamsOptional {
ref_record,
gene_map,
Expand Down Expand Up @@ -160,6 +166,12 @@ impl NextcladeParams {
.transpose()?
.unwrap_or_default();

if let Some(tree) = &tree {
if let Some(tree_ref) = tree.root_sequence() {
check_ref_seq_mismatch(&ref_record.seq, tree_ref)?;
}
}

Ok(Self {
ref_record,
gene_map,
Expand Down
34 changes: 34 additions & 0 deletions packages/nextclade/src/tree/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::graph::traits::{HasDivergence, HasName};
use crate::io::fs::read_file_to_string;
use crate::io::json::json_parse;
use eyre::{eyre, Report, WrapErr};
use log::warn;
use schemars::JsonSchema;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::BTreeMap;
Expand Down Expand Up @@ -668,4 +669,37 @@ impl AuspiceTree {
pub fn map_nodes_mut(&mut self, action: fn((usize, &mut AuspiceTreeNode))) {
Self::map_nodes_mut_rec(0, &mut self.tree, action);
}

pub fn root_sequence(&self) -> Option<&str> {
self
.root_sequence
.as_ref()
.and_then(|root_sequence| root_sequence.get("nuc"))
.map(String::as_str)
}
}

pub fn check_ref_seq_mismatch(
standalone_ref_seq: impl AsRef<str>,
tree_ref_seq: impl AsRef<str>,
) -> Result<(), Report> {
if standalone_ref_seq.as_ref() != tree_ref_seq.as_ref() {
warn!(
r#"Nextclade detected that reference sequence provided does not exactly match reference (root) sequence in Auspice JSON.
This could be due to one of the reasons:
- Nextclade dataset author provided reference sequence and reference tree that are incompatible
- The reference tree has been constructed incorrectly
- The reference sequence provided using `--input-ref` CLI argument is not compatible with the reference tree in the dataset
- The reference tree provided using `--input-tree` CLI argument is not compatible with the reference sequence in the dataset
- The reference sequence provided using `&input-ref` parameter in Nextclade Web URL is not compatible with the reference tree in the dataset
- The reference tree provided using `&input-tree` parameter in Nextclade Web URL is not compatible with the reference sequence in the dataset
This warning signals that there is a potential for failures if the mismatch is not intended.
"#
);
}

Ok(())
}

0 comments on commit f9d426c

Please sign in to comment.