Skip to content

Commit

Permalink
fix: problem with annotating stop_retained insertions (#131) (#132)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Oct 21, 2023
1 parent 548825f commit 0f42d20
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 35 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ seqrepo = { version = "0.8", features = ["cached"] }
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
thiserror = "1.0"
indexmap = { version = "2.0.0", features = ["serde"] }
indexmap = { version = "2", features = ["serde"] }

[dev-dependencies]
anyhow = "1.0"
criterion = "0.5"
csv = "1.2"
env_logger = "0.10"
insta = { version = "1", features = ["yaml"] }
pretty_assertions = "1.3"
rstest = "0.18"
test-log = "0.2"
Expand Down
13 changes: 9 additions & 4 deletions src/mapper/altseq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ impl RefTranscriptData {
}
}

#[derive(Debug, Clone)]
pub struct AltTranscriptData {
/// Transcript nucleotide sequence.
#[allow(dead_code)]
Expand Down Expand Up @@ -697,10 +698,14 @@ impl AltSeqToHgvsp {
.last()
.expect("should not happen; checked for being non-empty above")
+ 1;
(
format!("{}{}", deletion, &ref_sub[..max_diff]),
format!("{}{}", insertion, &alt_sub[..max_diff]),
)
if max_diff > ref_sub.len() || max_diff > alt_sub.len() {
(deletion.clone(), insertion.clone())
} else {
(
format!("{}{}", deletion, &ref_sub[..max_diff]),
format!("{}{}", insertion, &alt_sub[..max_diff]),
)
}
} else {
(deletion, insertion)
};
Expand Down
10 changes: 10 additions & 0 deletions src/mapper/snapshots/hgvs__mapper__variant__test__issue_131.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: src/mapper/variant.rs
expression: "&var_p_test"
---
ProtVariant:
accession:
value: NP_001240838.1
gene_symbol: ~
loc_edit: NoChange

13 changes: 13 additions & 0 deletions src/mapper/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,19 @@ mod test {

use super::{Config, Mapper};

#[test]
fn issue_131() -> Result<(), Error> {
let mapper = build_mapper()?;

let var_c = HgvsVariant::from_str("NM_001253909.2:c.416_417insGTG")?;
let var_p_test = mapper.c_to_p(&var_c, None)?;

assert_eq!(format!("{}", &var_p_test), "NP_001240838.1:p.=");
insta::assert_yaml_snapshot!(&var_p_test);

Ok(())
}

#[test]
fn test_sync() {
fn is_sync<T: Sync>() {}
Expand Down
52 changes: 26 additions & 26 deletions src/parser/ds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::parser::error::Error;
use log::warn;

/// Expression of "maybe uncertain".
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum Mu<T> {
/// Certain variant of `T`.
Certain(T),
Expand Down Expand Up @@ -53,7 +53,7 @@ impl<T> Mu<T> {
}

/// Representation of gene symbol, e.g., `TTN` or `Ttn`.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct GeneSymbol {
pub value: String,
}
Expand All @@ -79,7 +79,7 @@ impl Deref for GeneSymbol {
}

/// Edit of nucleic acids.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum NaEdit {
/// A substitution where both reference and alternative allele are nucleic acid strings
/// (or empty).
Expand Down Expand Up @@ -189,7 +189,7 @@ impl NaEdit {
}

/// Uncertain change through extension.
#[derive(Clone, Debug, PartialEq, Default)]
#[derive(Clone, Debug, PartialEq, Default, serde::Serialize, serde::Deserialize)]
pub enum UncertainLengthChange {
#[default]
None,
Expand All @@ -198,7 +198,7 @@ pub enum UncertainLengthChange {
}

/// Representation of accession, e.g., `NM_01234.5`.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct Accession {
pub value: String,
}
Expand All @@ -224,7 +224,7 @@ impl Accession {
}

/// Protein edit with interval end edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum ProteinEdit {
Fs {
alternative: Option<String>,
Expand Down Expand Up @@ -259,7 +259,7 @@ pub enum ProteinEdit {
}

/// A HGVS variant specification.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum HgvsVariant {
/// Variant specification with `c.` location.
CdsVariant {
Expand Down Expand Up @@ -563,7 +563,7 @@ impl HgvsVariant {
}

/// Coding sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CdsLocEdit {
/// Location on the CDS.
pub loc: Mu<CdsInterval>,
Expand Down Expand Up @@ -596,7 +596,7 @@ impl CdsLocEdit {
}

/// CDS position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CdsInterval {
/// Start position
pub start: CdsPos,
Expand Down Expand Up @@ -627,14 +627,14 @@ impl TryFrom<CdsInterval> for Range<i32> {

/// Specifies whether the CDS position is relative to the CDS start or
/// CDS end.
#[derive(Clone, Copy, Debug, PartialEq)]
#[derive(Clone, Copy, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum CdsFrom {
Start,
End,
}

/// CDS position.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CdsPos {
/// Base position.
pub base: i32,
Expand All @@ -645,7 +645,7 @@ pub struct CdsPos {
}

/// Genome sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct GenomeLocEdit {
/// Location on the genome.
pub loc: Mu<GenomeInterval>,
Expand Down Expand Up @@ -678,7 +678,7 @@ impl GenomeLocEdit {
}

/// Genome position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct GenomeInterval {
/// Start position
pub start: Option<i32>,
Expand All @@ -701,7 +701,7 @@ impl TryInto<Range<i32>> for GenomeInterval {
}

/// Mitochondrial sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct MtLocEdit {
/// Location on the mitochondrium.
pub loc: Mu<MtInterval>,
Expand Down Expand Up @@ -733,7 +733,7 @@ impl MtLocEdit {
}
}
/// Mitochondrial position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct MtInterval {
/// Start position
pub start: Option<i32>,
Expand All @@ -756,7 +756,7 @@ impl TryInto<Range<i32>> for MtInterval {
}

/// Transcript sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TxLocEdit {
/// Loction on a transcript.
pub loc: Mu<TxInterval>,
Expand Down Expand Up @@ -789,7 +789,7 @@ impl TxLocEdit {
}

/// Transcript position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TxInterval {
/// Start position
pub start: TxPos,
Expand All @@ -813,7 +813,7 @@ impl From<TxInterval> for Range<i32> {
}

/// Transcript position.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TxPos {
/// Base position.
pub base: i32,
Expand All @@ -822,7 +822,7 @@ pub struct TxPos {
}

/// RNA sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct RnaLocEdit {
/// Location on a transcript.
pub loc: Mu<RnaInterval>,
Expand Down Expand Up @@ -854,7 +854,7 @@ impl RnaLocEdit {
}
}
/// RNA position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct RnaInterval {
/// Start position
pub start: RnaPos,
Expand All @@ -878,7 +878,7 @@ impl From<RnaInterval> for Range<i32> {
}

/// RNA position.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct RnaPos {
/// Base position.
pub base: i32,
Expand All @@ -887,7 +887,7 @@ pub struct RnaPos {
}

/// Protein sequence location with edit or special.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum ProtLocEdit {
Ordinary {
loc: Mu<ProtInterval>,
Expand All @@ -908,7 +908,7 @@ pub enum ProtLocEdit {
}

/// Protein position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct ProtInterval {
/// Start position
pub start: ProtPos,
Expand All @@ -927,7 +927,7 @@ impl From<ProtInterval> for Range<i32> {
}

/// Protein position.
#[derive(Clone, Debug, PartialEq, Default)]
#[derive(Clone, Debug, PartialEq, Default, serde::Serialize, serde::Deserialize)]
pub struct ProtPos {
/// Amino acid value.
pub aa: String,
Expand Down Expand Up @@ -973,13 +973,13 @@ mod test {
assert_eq!(Mu::from(Some(1), false), Mu::Uncertain(Some(1)));
}

#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TestInterval {
pub start: TestPos,
pub end: TestPos,
}

#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TestPos {
pub base: i32,
pub offset: Option<i32>,
Expand Down
1 change: 1 addition & 0 deletions tests/data/data/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ ADGRL3
ADRA2B
ADRB2
AGBL5
AKR1C3
ALG9
AOAH
ASB18
Expand Down
4 changes: 2 additions & 2 deletions tests/data/data/uta_20210129-subset.pgd.gz
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/data/seqrepo_cache.fasta
Git LFS file not shown

0 comments on commit 0f42d20

Please sign in to comment.