Skip to content

Commit

Permalink
Add latest changes
Browse files Browse the repository at this point in the history
  • Loading branch information
jarbesfeld committed Dec 5, 2024
1 parent af28a0f commit fa54de7
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 40 deletions.
39 changes: 11 additions & 28 deletions schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,7 @@
"title": "Version"
},
"code": {
"allOf": [
{
"$ref": "#/$defs/Code"
}
],
"$ref": "#/$defs/Code",
"description": "A symbol uniquely identifying the concept, as in a syntax defined by the code system. CURIE format is preferred where possible (e.g. 'SO:0000704' is the CURIE form of the Sequence Ontology code for 'gene')."
}
},
Expand Down Expand Up @@ -412,19 +408,11 @@
"description": "A mapping to a concept in a terminology or code system.",
"properties": {
"coding": {
"allOf": [
{
"$ref": "#/$defs/Coding"
}
],
"$ref": "#/$defs/Coding",
"description": "A structured representation of a code for a defined concept in a terminology or code system."
},
"relation": {
"allOf": [
{
"$ref": "#/$defs/Relation"
}
],
"$ref": "#/$defs/Relation",
"description": "A mapping relation between concepts as defined by the Simple Knowledge Organization System (SKOS)."
}
},
Expand All @@ -439,11 +427,7 @@
"description": "Representation of a variation by a specified nomenclature or syntax for a\nVariation object. Common examples of expressions for the description of molecular\nvariation include the HGVS and ISCN nomenclatures.",
"properties": {
"syntax": {
"allOf": [
{
"$ref": "#/$defs/Syntax"
}
],
"$ref": "#/$defs/Syntax",
"description": "The syntax used to describe the variation. The value should be one of the supported syntaxes."
},
"value": {
Expand Down Expand Up @@ -751,11 +735,7 @@
"title": "Mappings"
},
"sequence": {
"allOf": [
{
"$ref": "#/$defs/SequenceString"
}
],
"$ref": "#/$defs/SequenceString",
"description": "the literal sequence"
}
},
Expand Down Expand Up @@ -972,21 +952,24 @@
"pre_mapped": {
"anyOf": [
{
"$ref": "#/$defs/Allele"
"$ref": "#/$defs/CisPhasedBlock"
},
{
"$ref": "#/$defs/CisPhasedBlock"
"$ref": "#/$defs/Allele"
}
],
"title": "Pre Mapped"
},
"post_mapped": {
"anyOf": [
{
"$ref": "#/$defs/CisPhasedBlock"
},
{
"$ref": "#/$defs/Allele"
},
{
"$ref": "#/$defs/CisPhasedBlock"
"type": "null"
}
],
"title": "Post Mapped"
Expand Down
9 changes: 8 additions & 1 deletion src/dcd_mapping/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,16 @@ def _get_blat_output(metadata: ScoresetMetadata, silent: bool) -> QueryResult:
"""
with tempfile.NamedTemporaryFile() as query_file:
query_file = _build_query_file(metadata, Path(query_file.name))
if len(metadata.target_sequence) > 25000:
msg = f"Target sequence for {metadata.urn} must have a length <= 25000 to run BLAT"
raise AlignmentError(msg)

if metadata.target_sequence_type == TargetSequenceType.PROTEIN:
target_args = "-q=prot -t=dnax"
elif metadata.target_gene_category == TargetType.PROTEIN_CODING:
elif (
metadata.target_gene_category == TargetType.PROTEIN_CODING
and len(metadata.target_sequence) <= 10000
):
target_args = "-q=dnax -t=dnax"
else:
target_args = ""
Expand Down
25 changes: 18 additions & 7 deletions src/dcd_mapping/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
CisPhasedBlock,
Expression,
LiteralSequenceExpression,
SequenceString,
)

from dcd_mapping.lookup import (
Expand Down Expand Up @@ -115,7 +116,7 @@ def _get_vrs_ref_allele_seq(
ref = sr.get_sequence(seq, start, end)
if ref is None:
raise ValueError
return Extension(name="vrs_ref_allele_seq", value=ref)
return SequenceString(root=ref)


def _get_hgvs_string(allele: Allele, accession: str) -> tuple[str, Syntax]:
Expand Down Expand Up @@ -208,9 +209,11 @@ def _annotate_allele_mapping(

# get vrs_ref_allele_seq for pre-mapped variants
pre_mapped.extensions = [
_get_vrs_ref_allele_seq(pre_mapped, metadata, tx_results),
_get_vrs_1_3_ext(pre_mapped),
]
pre_mapped.location.sequence = _get_vrs_ref_allele_seq(
pre_mapped, metadata, tx_results
)

# Determine reference sequence
if mapped_score.annotation_layer == AnnotationLayer.GENOMIC:
Expand All @@ -228,9 +231,10 @@ def _annotate_allele_mapping(
sr = get_seqrepo()
loc = mapped_score.post_mapped.location
sequence_id = f"ga4gh:{loc.sequenceReference.refgetAccession}"
ref = sr.get_sequence(sequence_id, loc.start, loc.end)
post_mapped.location.sequence = SequenceString(
root=sr.get_sequence(sequence_id, loc.start, loc.end)
)
post_mapped.extensions = [
Extension(name="vrs_ref_allele_seq", value=ref),
_get_vrs_1_3_ext(post_mapped),
_get_va_digest(pre_mapped),
]
Expand All @@ -246,6 +250,10 @@ def _annotate_allele_mapping(
post_mapped if _is_valid_allele(pre_mapped, align_result) else None
)

# Remove extra digest attributes
pre_mapped.digest = None
post_mapped.digest = None

return ScoreAnnotationWithLayer(
pre_mapped=pre_mapped,
post_mapped=post_mapped,
Expand Down Expand Up @@ -280,9 +288,10 @@ def _annotate_cpb_mapping(
# get vrs_ref_allele_seq for pre-mapped variants
for allele in pre_mapped.members:
allele.extensions = [
_get_vrs_ref_allele_seq(allele, metadata, tx_results),
_get_vrs_1_3_ext(allele),
]
allele.location.sequence = _get_vrs_ref_allele_seq(allele, metadata, tx_results)
allele.digest = None
# Determine reference sequence
if mapping.annotation_layer == AnnotationLayer.GENOMIC:
sequence_id = (
Expand All @@ -305,9 +314,10 @@ def _annotate_cpb_mapping(
):
loc = post_mapped_allele.location
sequence_id = f"ga4gh:{loc.sequenceReference.refgetAccession}"
ref = sr.get_sequence(sequence_id, loc.start, loc.end)
post_mapped_allele.location.sequence = SequenceString(
root=sr.get_sequence(sequence_id, loc.start, loc.end)
)
post_mapped_allele.extensions = [
Extension(name="vrs_ref_allele_seq", value=ref),
_get_vrs_1_3_ext(post_mapped_allele),
_get_va_digest(pre_mapped_allele),
]
Expand All @@ -317,6 +327,7 @@ def _annotate_cpb_mapping(
pre_mapped_allele, align_result
):
valid_post_mapped_alleles.append(post_mapped_allele)
post_mapped_allele.digest = None
post_mapped.members = valid_post_mapped_alleles

pre_mapped.extensions = [
Expand Down
6 changes: 3 additions & 3 deletions src/dcd_mapping/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ class TargetSequenceType(str, Enum):
class TargetType(str, Enum):
"""Define target gene types."""

PROTEIN_CODING = "Protein coding"
REGULATORY = "Regulatory"
OTHER_NC = "Other noncoding"
PROTEIN_CODING = "protein_coding"
REGULATORY = "regulatory"
OTHER_NC = "other_noncoding"


class UniProtRef(BaseModel):
Expand Down
1 change: 0 additions & 1 deletion src/dcd_mapping/vrs_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,6 @@ def _get_variation(

# Run ga4gh_identify to assign VA digest
allele.id = ga4gh_identify(allele)
allele.digest = None
alleles.append(allele)

if not alleles:
Expand Down

0 comments on commit fa54de7

Please sign in to comment.