diff --git a/src/mapper/assembly.rs b/src/mapper/assembly.rs index 142ebc5..fa977b6 100644 --- a/src/mapper/assembly.rs +++ b/src/mapper/assembly.rs @@ -41,6 +41,9 @@ pub struct Config { /// Re-normalize out of bounds genome variants on minus strand. This can be /// switched off so genome sequence does not have to be available in provider. pub renormalize_g: bool, + /// Use the genome sequence in case of uncertain g-to-n projections. This + /// can be switched off so genome sequence does not have to be available. + pub genome_seq_available: bool, } impl Default for Config { @@ -56,6 +59,7 @@ impl Default for Config { strict_bounds: true, add_gene_symbol: false, renormalize_g: true, + genome_seq_available: true, } } } @@ -103,6 +107,7 @@ impl Mapper { add_gene_symbol: config.add_gene_symbol, strict_bounds: config.strict_bounds, renormalize_g: config.renormalize_g, + genome_seq_available: config.genome_seq_available, }; let inner = VariantMapper::new(&inner_config, provider.clone()); let asm_accessions = provider diff --git a/src/mapper/variant.rs b/src/mapper/variant.rs index 886b503..c3de96d 100644 --- a/src/mapper/variant.rs +++ b/src/mapper/variant.rs @@ -30,6 +30,9 @@ pub struct Config { /// Re-normalize out of bounds genome variants on minus strand. This can be /// switched off so genome sequence does not have to be available in provider. pub renormalize_g: bool, + /// Use the genome sequence in case of uncertain g-to-n projections. This + /// can be switched off so genome sequence does not have to be available. + pub genome_seq_available: bool, } impl Default for Config { @@ -41,6 +44,7 @@ impl Default for Config { add_gene_symbol: false, strict_bounds: true, renormalize_g: true, + genome_seq_available: true, } } } @@ -206,7 +210,11 @@ impl Mapper { pos_n.inner(), loc_edit.loc.is_certain() && pos_n.is_certain(), ); - let (pos_n, edit_n) = if let Mu::Certain(pos_n) = pos_n { + // The original Python code falls back to the genome for uncertain positions. This + // cannot be done if we do not have the original genome sequence. + let pos_n_certain = pos_n.is_certain(); + let pos_n = pos_n.inner(); + let (pos_n, edit_n) = if pos_n_certain || !self.config.genome_seq_available { let edit_n = self.convert_edit_check_strand(mapper.strand, &loc_edit.edit)?; if let NaEdit::Ins { alternative } = edit_n.inner() { if pos_n.start.offset.is_none() @@ -236,7 +244,9 @@ impl Mapper { (Mu::Certain((*pos_n).clone()), edit_n) } } else { - let pos_g = mapper.n_to_g(pos_n.inner())?; + // This is the how the original code handles uncertain positions. We will reach + // here if the position is uncertain and we have the genome sequence. + let pos_g = mapper.n_to_g(pos_n)?; let edit_n = NaEdit::RefAlt { reference: "".to_string(), alternative: self.get_altered_sequence( @@ -245,7 +255,7 @@ impl Mapper { &var_g, )?, }; - (Mu::Certain((*pos_n.inner()).clone()), Mu::Certain(edit_n)) + (Mu::Certain((*pos_n).clone()), Mu::Certain(edit_n)) }; // the following is not needed? diff --git a/src/validator/mod.rs b/src/validator/mod.rs index 18c3228..4be8614 100644 --- a/src/validator/mod.rs +++ b/src/validator/mod.rs @@ -126,6 +126,7 @@ impl ExtrinsicValidator { add_gene_symbol: false, strict_bounds: true, renormalize_g: false, + genome_seq_available: true, }; Self { strict,