Skip to content

Commit

Permalink
feat: support for more annotations (#2041)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Oct 24, 2024
1 parent f15205c commit b93baee
Show file tree
Hide file tree
Showing 11 changed files with 517 additions and 136 deletions.
34 changes: 34 additions & 0 deletions backend/protos/seqvars/protos/output.proto
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,30 @@ message GeneIdentity {
string gene_symbol = 2;
}

// Transcript type.
enum TranscriptType {
// Unknown transcript type.
TRANSCRIPT_TYPE_UNSPECIFIED = 0;
// Coding transcript.
TRANSCRIPT_TYPE_CODING = 1;
// Non-coding transcript.
TRANSCRIPT_TYPE_NON_CODING = 2;
}

// Location where the variant falls in relation to a transcript.
enum VariantLocation {
// Unspecified location.
VARIANT_LOCATION_UNSPECIFIED = 0;
// Upstream of gene.
VARIANT_LOCATION_UPSTREAM = 1;
// Exonic.
VARIANT_LOCATION_EXON = 2;
// Intronic.
VARIANT_LOCATION_INTRON = 3;
// Downstream of the gene.
VARIANT_LOCATION_DOWNSTREAM = 4;
}

/// Gene-related consequences of a variant.
message GeneRelatedConsequences {
// HGVS. {c,n} code of variant.
Expand All @@ -164,6 +188,16 @@ message GeneRelatedConsequences {
optional string hgvs_p = 2;
// Predicted variant consequences.
repeated seqvars.pbs.query.Consequence consequences = 3;
// Transcript accession without version.
optional string tx_accession = 4;
// Transcript version.
optional int32 tx_version = 5;
// Whether exon or intron is hit.
VariantLocation location = 6;
// Exon/intron number (1-based).
optional int32 rank_ord = 7;
// Exon/intron total count.
optional int32 rank_total = 8;
}

// Enumerations with modes of inheritance from HPO.
Expand Down
35 changes: 35 additions & 0 deletions backend/seqvars/factory_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,41 @@ def create_seqvarsquerypresetsclinvar(faker: Faker) -> list[SeqvarsQueryPresetsC
name="payload.variant_annotation.gene.consequences.hgvs_p",
description="HGVS description at protein level",
),
SeqvarsColumnConfigPydantic(
label="Transcript Accession",
name="payload.variant_annotation.gene.consequences.tx_accession",
description="Transcript accession without version",
),
SeqvarsColumnConfigPydantic(
label="Transcript Version",
name="payload.variant_annotation.gene.consequences.tx_version",
description="Transcript version",
),
SeqvarsColumnConfigPydantic(
label="Transcript",
name="__tx_accession_version__",
description="Transcript accession with version",
),
SeqvarsColumnConfigPydantic(
label="Variant Location",
name="payload.variant_annotation.gene.consequences.location",
description="Variant location with respect to gene",
),
SeqvarsColumnConfigPydantic(
label="Variant Location Rank No",
name="payload.variant_annotation.gene.consequences.rank_ord",
description="Number of exon/intron that variant is in",
),
SeqvarsColumnConfigPydantic(
label="Variant Location Rank Total",
name="payload.variant_annotation.gene.consequences.rank_total",
description="Total number of exons/introns in transcript",
),
SeqvarsColumnConfigPydantic(
label="Variant Location Rank No/Total",
name="__rank__",
description="Rank of exon/intron and total",
),
SeqvarsColumnConfigPydantic(
label="ClinGen HI",
name="__clingen_hi__",
Expand Down
23 changes: 23 additions & 0 deletions backend/seqvars/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2047,6 +2047,19 @@ class GeneIdentityPydantic(pydantic.BaseModel):
gene_symbol: str


class VariantLocationChoice(str, Enum):
"""Enumeration for the variant location."""

#: Upstream of gene.
UPSTREAM = "upstream"
#: Exonic.
EXON = "exon"
#: Intronic.
INTRON = "intron"
#: Downstream of gene.
DOWNSTREAM = "downstream"


class GeneRelatedConsequencesPydantic(pydantic.BaseModel):
"""Store gene-related consequences."""

Expand All @@ -2056,6 +2069,16 @@ class GeneRelatedConsequencesPydantic(pydantic.BaseModel):
hgvs_p: typing.Optional[str]
#: Predicted variant consequences.
consequences: list[SeqvarsVariantConsequenceChoice]
# Transcript accession without version.
tx_accession: typing.Optional[str]
# Transcript version.
tx_version: typing.Optional[int]
# Whether exon or intron is hit.
location: VariantLocationChoice
# Exon/intron number (1-based).
rank_ord: typing.Optional[int]
# Exon/intron total count.
rank_total: typing.Optional[int]


class SeqvarsModeOfInheritance(str, Enum):
Expand Down
21 changes: 21 additions & 0 deletions backend/seqvars/models/protobufs.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
SeqvarsVariantTypeChoice,
SeqvarsVcfVariantPydantic,
ShetConstraintsPydantic,
VariantLocationChoice,
)
from seqvars.protos.output_pb2 import (
AggregateGermlineReviewStatus,
Expand Down Expand Up @@ -95,6 +96,7 @@
ScoreAnnotations,
ShetConstraints,
VariantAnnotation,
VariantLocation,
VariantRelatedAnnotation,
VariantScoreColumn,
VariantScoreColumnType,
Expand Down Expand Up @@ -761,6 +763,20 @@ def _gene_identity_from_protobuf(identity: GeneIdentity) -> GeneIdentityPydantic
)


VARIANT_LOCATION_MAPPING: dict[VariantLocation.ValueType : VariantLocationChoice] = {
VariantLocation.VARIANT_LOCATION_UPSTREAM: VariantLocationChoice.UPSTREAM,
VariantLocation.VARIANT_LOCATION_EXON: VariantLocationChoice.EXON,
VariantLocation.VARIANT_LOCATION_INTRON: VariantLocationChoice.INTRON,
VariantLocation.VARIANT_LOCATION_DOWNSTREAM: VariantLocationChoice.DOWNSTREAM,
}


def _location_from_protobuf(
location: VariantLocation.ValueType,
) -> VariantLocationChoice:
return VARIANT_LOCATION_MAPPING[location]


def _consequences_from_protobuf(
consequences: GeneRelatedConsequences,
) -> GeneRelatedConsequencesPydantic:
Expand All @@ -771,6 +787,11 @@ def _consequences_from_protobuf(
_seqvars_variant_consequence_choice_from_protobuf(csq)
for csq in consequences.consequences
],
tx_accession=consequences.tx_accession if consequences.HasField("tx_accession") else None,
tx_version=consequences.tx_version if consequences.HasField("tx_version") else None,
location=_location_from_protobuf(consequences.location),
rank_ord=consequences.rank_ord if consequences.HasField("rank_ord") else None,
rank_total=consequences.rank_total if consequences.HasField("rank_total") else None,
)


Expand Down
100 changes: 52 additions & 48 deletions backend/seqvars/protos/output_pb2.py

Large diffs are not rendered by default.

142 changes: 140 additions & 2 deletions backend/seqvars/protos/output_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,68 @@ GENOME_RELEASE_GRCH38: GenomeRelease.ValueType # 2
"""GRCh38."""
global___GenomeRelease = GenomeRelease

class _TranscriptType:
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType

class _TranscriptTypeEnumTypeWrapper(
google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_TranscriptType.ValueType],
builtins.type,
): # noqa: F821
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
TRANSCRIPT_TYPE_UNSPECIFIED: _TranscriptType.ValueType # 0
"""Unknown transcript type."""
TRANSCRIPT_TYPE_CODING: _TranscriptType.ValueType # 1
"""Coding transcript."""
TRANSCRIPT_TYPE_NON_CODING: _TranscriptType.ValueType # 2
"""Non-coding transcript."""

class TranscriptType(_TranscriptType, metaclass=_TranscriptTypeEnumTypeWrapper):
"""Transcript type."""

TRANSCRIPT_TYPE_UNSPECIFIED: TranscriptType.ValueType # 0
"""Unknown transcript type."""
TRANSCRIPT_TYPE_CODING: TranscriptType.ValueType # 1
"""Coding transcript."""
TRANSCRIPT_TYPE_NON_CODING: TranscriptType.ValueType # 2
"""Non-coding transcript."""
global___TranscriptType = TranscriptType

class _VariantLocation:
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType

class _VariantLocationEnumTypeWrapper(
google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_VariantLocation.ValueType],
builtins.type,
): # noqa: F821
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
VARIANT_LOCATION_UNSPECIFIED: _VariantLocation.ValueType # 0
"""Unspecified location."""
VARIANT_LOCATION_UPSTREAM: _VariantLocation.ValueType # 1
"""Upstream of gene."""
VARIANT_LOCATION_EXON: _VariantLocation.ValueType # 2
"""Exonic."""
VARIANT_LOCATION_INTRON: _VariantLocation.ValueType # 3
"""Intronic."""
VARIANT_LOCATION_DOWNSTREAM: _VariantLocation.ValueType # 4
"""Downstream of the gene."""

class VariantLocation(_VariantLocation, metaclass=_VariantLocationEnumTypeWrapper):
"""Location where the variant falls in relation to a transcript."""

VARIANT_LOCATION_UNSPECIFIED: VariantLocation.ValueType # 0
"""Unspecified location."""
VARIANT_LOCATION_UPSTREAM: VariantLocation.ValueType # 1
"""Upstream of gene."""
VARIANT_LOCATION_EXON: VariantLocation.ValueType # 2
"""Exonic."""
VARIANT_LOCATION_INTRON: VariantLocation.ValueType # 3
"""Intronic."""
VARIANT_LOCATION_DOWNSTREAM: VariantLocation.ValueType # 4
"""Downstream of the gene."""
global___VariantLocation = VariantLocation

class _ModeOfInheritance:
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType
Expand Down Expand Up @@ -880,6 +942,11 @@ class GeneRelatedConsequences(google.protobuf.message.Message):
HGVS_T_FIELD_NUMBER: builtins.int
HGVS_P_FIELD_NUMBER: builtins.int
CONSEQUENCES_FIELD_NUMBER: builtins.int
TX_ACCESSION_FIELD_NUMBER: builtins.int
TX_VERSION_FIELD_NUMBER: builtins.int
LOCATION_FIELD_NUMBER: builtins.int
RANK_ORD_FIELD_NUMBER: builtins.int
RANK_TOTAL_FIELD_NUMBER: builtins.int
hgvs_t: builtins.str
"""HGVS. {c,n} code of variant."""
hgvs_p: builtins.str
Expand All @@ -891,7 +958,16 @@ class GeneRelatedConsequences(google.protobuf.message.Message):
seqvars.protos.query_pb2.Consequence.ValueType
]:
"""Predicted variant consequences."""

tx_accession: builtins.str
"""Transcript accession without version."""
tx_version: builtins.int
"""Transcript version."""
location: global___VariantLocation.ValueType
"""Whether exon or intron is hit."""
rank_ord: builtins.int
"""Exon/intron number (1-based)."""
rank_total: builtins.int
"""Exon/intron total count."""
def __init__(
self,
*,
Expand All @@ -900,11 +976,39 @@ class GeneRelatedConsequences(google.protobuf.message.Message):
consequences: (
collections.abc.Iterable[seqvars.protos.query_pb2.Consequence.ValueType] | None
) = ...,
tx_accession: builtins.str | None = ...,
tx_version: builtins.int | None = ...,
location: global___VariantLocation.ValueType = ...,
rank_ord: builtins.int | None = ...,
rank_total: builtins.int | None = ...,
) -> None: ...
def HasField(
self,
field_name: typing_extensions.Literal[
"_hgvs_p", b"_hgvs_p", "_hgvs_t", b"_hgvs_t", "hgvs_p", b"hgvs_p", "hgvs_t", b"hgvs_t"
"_hgvs_p",
b"_hgvs_p",
"_hgvs_t",
b"_hgvs_t",
"_rank_ord",
b"_rank_ord",
"_rank_total",
b"_rank_total",
"_tx_accession",
b"_tx_accession",
"_tx_version",
b"_tx_version",
"hgvs_p",
b"hgvs_p",
"hgvs_t",
b"hgvs_t",
"rank_ord",
b"rank_ord",
"rank_total",
b"rank_total",
"tx_accession",
b"tx_accession",
"tx_version",
b"tx_version",
],
) -> builtins.bool: ...
def ClearField(
Expand All @@ -914,12 +1018,30 @@ class GeneRelatedConsequences(google.protobuf.message.Message):
b"_hgvs_p",
"_hgvs_t",
b"_hgvs_t",
"_rank_ord",
b"_rank_ord",
"_rank_total",
b"_rank_total",
"_tx_accession",
b"_tx_accession",
"_tx_version",
b"_tx_version",
"consequences",
b"consequences",
"hgvs_p",
b"hgvs_p",
"hgvs_t",
b"hgvs_t",
"location",
b"location",
"rank_ord",
b"rank_ord",
"rank_total",
b"rank_total",
"tx_accession",
b"tx_accession",
"tx_version",
b"tx_version",
],
) -> None: ...
@typing.overload
Expand All @@ -930,6 +1052,22 @@ class GeneRelatedConsequences(google.protobuf.message.Message):
def WhichOneof(
self, oneof_group: typing_extensions.Literal["_hgvs_t", b"_hgvs_t"]
) -> typing_extensions.Literal["hgvs_t"] | None: ...
@typing.overload
def WhichOneof(
self, oneof_group: typing_extensions.Literal["_rank_ord", b"_rank_ord"]
) -> typing_extensions.Literal["rank_ord"] | None: ...
@typing.overload
def WhichOneof(
self, oneof_group: typing_extensions.Literal["_rank_total", b"_rank_total"]
) -> typing_extensions.Literal["rank_total"] | None: ...
@typing.overload
def WhichOneof(
self, oneof_group: typing_extensions.Literal["_tx_accession", b"_tx_accession"]
) -> typing_extensions.Literal["tx_accession"] | None: ...
@typing.overload
def WhichOneof(
self, oneof_group: typing_extensions.Literal["_tx_version", b"_tx_version"]
) -> typing_extensions.Literal["tx_version"] | None: ...

global___GeneRelatedConsequences = GeneRelatedConsequences

Expand Down
Loading

0 comments on commit b93baee

Please sign in to comment.