Skip to content

Commit

Permalink
feat: allow configuring that there is no genome sequence (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Mar 30, 2023
1 parent ae94b0a commit cd5b7bb
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
5 changes: 5 additions & 0 deletions src/mapper/assembly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ pub struct Config {
/// Re-normalize out of bounds genome variants on minus strand. This can be
/// switched off so genome sequence does not have to be available in provider.
pub renormalize_g: bool,
/// Use the genome sequence in case of uncertain g-to-n projections. This
/// can be switched off so genome sequence does not have to be available.
pub genome_seq_available: bool,
}

impl Default for Config {
Expand All @@ -56,6 +59,7 @@ impl Default for Config {
strict_bounds: true,
add_gene_symbol: false,
renormalize_g: true,
genome_seq_available: true,
}
}
}
Expand Down Expand Up @@ -103,6 +107,7 @@ impl Mapper {
add_gene_symbol: config.add_gene_symbol,
strict_bounds: config.strict_bounds,
renormalize_g: config.renormalize_g,
genome_seq_available: config.genome_seq_available,
};
let inner = VariantMapper::new(&inner_config, provider.clone());
let asm_accessions = provider
Expand Down
16 changes: 13 additions & 3 deletions src/mapper/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ pub struct Config {
/// Re-normalize out of bounds genome variants on minus strand. This can be
/// switched off so genome sequence does not have to be available in provider.
pub renormalize_g: bool,
/// Use the genome sequence in case of uncertain g-to-n projections. This
/// can be switched off so genome sequence does not have to be available.
pub genome_seq_available: bool,
}

impl Default for Config {
Expand All @@ -41,6 +44,7 @@ impl Default for Config {
add_gene_symbol: false,
strict_bounds: true,
renormalize_g: true,
genome_seq_available: true,
}
}
}
Expand Down Expand Up @@ -206,7 +210,11 @@ impl Mapper {
pos_n.inner(),
loc_edit.loc.is_certain() && pos_n.is_certain(),
);
let (pos_n, edit_n) = if let Mu::Certain(pos_n) = pos_n {
// The original Python code falls back to the genome for uncertain positions. This
// cannot be done if we do not have the original genome sequence.
let pos_n_certain = pos_n.is_certain();
let pos_n = pos_n.inner();
let (pos_n, edit_n) = if pos_n_certain || !self.config.genome_seq_available {
let edit_n = self.convert_edit_check_strand(mapper.strand, &loc_edit.edit)?;
if let NaEdit::Ins { alternative } = edit_n.inner() {
if pos_n.start.offset.is_none()
Expand Down Expand Up @@ -236,7 +244,9 @@ impl Mapper {
(Mu::Certain((*pos_n).clone()), edit_n)
}
} else {
let pos_g = mapper.n_to_g(pos_n.inner())?;
// This is the how the original code handles uncertain positions. We will reach
// here if the position is uncertain and we have the genome sequence.
let pos_g = mapper.n_to_g(pos_n)?;
let edit_n = NaEdit::RefAlt {
reference: "".to_string(),
alternative: self.get_altered_sequence(
Expand All @@ -245,7 +255,7 @@ impl Mapper {
&var_g,
)?,
};
(Mu::Certain((*pos_n.inner()).clone()), Mu::Certain(edit_n))
(Mu::Certain((*pos_n).clone()), Mu::Certain(edit_n))
};

// the following is not needed?
Expand Down
1 change: 1 addition & 0 deletions src/validator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ impl ExtrinsicValidator {
add_gene_symbol: false,
strict_bounds: true,
renormalize_g: false,
genome_seq_available: true,
};
Self {
strict,
Expand Down

0 comments on commit cd5b7bb

Please sign in to comment.