From b612ae3d166c4201fb7502bf5033a7c7bef577f9 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 11 Nov 2024 07:18:03 +0100 Subject: [PATCH 1/3] feat: provide genes/transcripts endpoint with openapi (#605) --- codecov.yml | 31 +++ src/server/run/actix_server/gene_txs.rs | 319 ++++++++++++++++++++++-- src/server/run/actix_server/mod.rs | 1 + src/server/run/actix_server/versions.rs | 25 +- 4 files changed, 360 insertions(+), 16 deletions(-) diff --git a/codecov.yml b/codecov.yml index 6a2971b7..72ba1080 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,2 +1,33 @@ +# For more configuration details: +# https://docs.codecov.io/docs/codecov-yaml + +# Check if this file is valid by running in bash: +# curl -X POST --data-binary @.codecov.yml https://codecov.io/validate + +# Codecov configuration +# --------------------- +codecov: + +# Coverage configuration +# ---------------------- +coverage: + status: + patch: false + + range: 70..90 # First number represents red, and second represents green + # (default is 70..100) + round: down # up, down, or nearest + precision: 0 # Number of decimal places, between 0 and 5 + +# Ignoring Paths +# -------------- +# which folders/files to ignore ignore: - "misc/*.py" + +# Pull request comments: +# ---------------------- +# Diff is the Coverage Diff of the pull request. +# Files are the files impacted by the pull request +comment: + layout: diff, files # accepted in any order: reach, diff, flags, and/or files diff --git a/src/server/run/actix_server/gene_txs.rs b/src/server/run/actix_server/gene_txs.rs index bc3e12f2..0d1ca01e 100644 --- a/src/server/run/actix_server/gene_txs.rs +++ b/src/server/run/actix_server/gene_txs.rs @@ -1,39 +1,44 @@ //! Implementation of `/seqvars/csq` endpoint. use crate::common::GenomeRelease; +use crate::pbs; use crate::pbs::server::{GeneTranscriptsQuery, GeneTranscriptsResponse}; use crate::pbs::txs::GenomeBuild; +use crate::server::run::actix_server::CustomError; use actix_web::{ get, web::{self, Data, Json, Path}, }; use hgvs::data::interface::Provider as _; +use super::versions::Assembly; + /// Maximal page size. static PAGE_SIZE_MAX: i32 = 1000; /// Default page size. static PAGE_SIZE_DEFAULT: i32 = 100; -#[allow(clippy::unused_async)] -#[get("/genes/txs")] -async fn handle( +/// Core implementation of the `/genes/txs` and `/api/v1/genes/transcripts` endpoints. +/// +/// For now takes the `GeneTranscriptsQuery` as the argument and returns +/// the `GeneTranscriptsResponse` as the result. +fn genes_tx_impl( data: Data, - _path: Path<()>, - query: web::Query, -) -> actix_web::Result, super::CustomError> { + query: GeneTranscriptsQuery, +) -> Result { let GeneTranscriptsQuery { genome_build, hgnc_id, page_size, next_page_token, - } = query.clone().into_inner(); + } = query; let genome_build = GenomeBuild::try_from(genome_build.unwrap_or(GenomeBuild::Grch37 as i32)) - .map_err(|e| super::CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; + .map_err(|e| CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; let genome_release = GenomeRelease::try_from(genome_build) - .map_err(|e| super::CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; + .map_err(|e| CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; let hgnc_id = hgnc_id .as_ref() - .ok_or_else(|| super::CustomError::new(anyhow::anyhow!("No HGNC ID provided.")))?; + .ok_or_else(|| CustomError::new(anyhow::anyhow!("No HGNC ID provided.")))?; let page_size = page_size .unwrap_or(PAGE_SIZE_DEFAULT) .min(PAGE_SIZE_MAX) @@ -42,10 +47,10 @@ async fn handle( let provider = data .provider .get(&genome_release) - .ok_or_else(|| super::CustomError::new(anyhow::anyhow!("No provider available.")))?; + .ok_or_else(|| CustomError::new(anyhow::anyhow!("No provider available.")))?; let tx_acs = provider .get_tx_for_gene(hgnc_id) - .map_err(|e| super::CustomError::new(anyhow::anyhow!("No transcripts found: {}", e)))? + .map_err(|e| CustomError::new(anyhow::anyhow!("No transcripts found: {}", e)))? .into_iter() .map(|tx| tx.tx_ac) .collect::>(); @@ -55,7 +60,7 @@ async fn handle( .unwrap_or(0); let last = (first + page_size as usize).min(tx_acs.len()); - Ok(Json(GeneTranscriptsResponse { + Ok(GeneTranscriptsResponse { transcripts: tx_acs[first..last] .iter() .filter_map(|tx_ac| provider.get_tx(tx_ac)) @@ -65,5 +70,291 @@ async fn handle( } else { None }, - })) + }) +} + +/// Implementation of the `/genes/txs` endpoint. +#[allow(clippy::unused_async)] +#[get("/genes/txs")] +async fn handle( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, CustomError> { + Ok(Json(genes_tx_impl(data, query.into_inner())?)) +} + +/// Query arguments for the `/api/v1/genes/transcripts` endpoint. +#[derive(Debug, Clone, serde::Deserialize, utoipa::ToSchema)] +struct GenesTranscriptsListQuery { + /// HGNC gene ID. + pub hgnc_id: String, + /// Genome build. + pub genome_build: Assembly, + /// Page size. + pub page_size: Option, + /// Next page token. + pub next_page_token: Option, +} + +impl From for pbs::server::GeneTranscriptsQuery { + fn from(val: GenesTranscriptsListQuery) -> Self { + pbs::server::GeneTranscriptsQuery { + genome_build: Some(Into::::into(val.genome_build) as i32), + hgnc_id: Some(val.hgnc_id), + page_size: val.page_size, + next_page_token: val.next_page_token, + } + } +} + +/// Enumeration for `Transcript::biotype`. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case")] +enum TranscriptBiotype { + /// Coding transcript. + Coding, + /// Non-coding transcript. + NonCoding, +} + +impl TryFrom for TranscriptBiotype { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::TranscriptBiotype) -> Result { + match value { + pbs::txs::TranscriptBiotype::Coding => Ok(TranscriptBiotype::Coding), + pbs::txs::TranscriptBiotype::NonCoding => Ok(TranscriptBiotype::NonCoding), + _ => Err(anyhow::anyhow!("Invalid biotype: {:?}", value)), + } + } +} + +// Bit values for the transcript tags. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case")] +enum TranscriptTag { + /// Member of Ensembl basic. + Basic, + /// Member of Ensembl canonical. + EnsemblCanonical, + /// Member of MANE Select. + ManeSelect, + /// Member of MANE Plus Clinical. + ManePlusClinical, + /// Member of RefSeq Select. + RefSeqSelect, + /// Flagged as being a selenoprotein (UGA => selenon). + Selenoprotein, + /// Member of GENCODE Primary + GencodePrimary, + /// Catchall for other tags. + Other, +} + +impl TryFrom for TranscriptTag { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::TranscriptTag) -> Result { + match value { + pbs::txs::TranscriptTag::Basic => Ok(TranscriptTag::Basic), + pbs::txs::TranscriptTag::EnsemblCanonical => Ok(TranscriptTag::EnsemblCanonical), + pbs::txs::TranscriptTag::ManeSelect => Ok(TranscriptTag::ManeSelect), + pbs::txs::TranscriptTag::ManePlusClinical => Ok(TranscriptTag::ManePlusClinical), + pbs::txs::TranscriptTag::RefSeqSelect => Ok(TranscriptTag::RefSeqSelect), + pbs::txs::TranscriptTag::Selenoprotein => Ok(TranscriptTag::Selenoprotein), + pbs::txs::TranscriptTag::GencodePrimary => Ok(TranscriptTag::GencodePrimary), + pbs::txs::TranscriptTag::Other => Ok(TranscriptTag::Other), + _ => Err(anyhow::anyhow!("Invalid transcript tag: {:?}", value)), + } + } +} + +/// Enumeration for the two strands of the genome. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case")] +enum Strand { + /// unknown + Unknown, + /// Forward / plus + Plus, + /// Reverse / minus + Minus, +} + +impl From for Strand { + fn from(value: pbs::txs::Strand) -> Self { + match value { + pbs::txs::Strand::Unknown => Strand::Unknown, + pbs::txs::Strand::Plus => Strand::Plus, + pbs::txs::Strand::Minus => Strand::Minus, + } + } +} + +/// Store the alignment of one exon to the reference. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct ExonAlignment { + /// Start position on reference. + pub alt_start_i: i32, + /// End position on reference. + pub alt_end_i: i32, + /// Exon number. + pub ord: i32, + /// CDS start coordinate. + pub alt_cds_start_i: Option, + /// CDS end coordinate. + pub alt_cds_end_i: Option, + /// CIGAR string of alignment, empty indicates full matches. + pub cigar: String, +} + +impl From for ExonAlignment { + fn from(value: pbs::txs::ExonAlignment) -> Self { + ExonAlignment { + alt_start_i: value.alt_start_i, + alt_end_i: value.alt_end_i, + ord: value.ord, + alt_cds_start_i: value.alt_cds_start_i, + alt_cds_end_i: value.alt_cds_end_i, + cigar: value.cigar.clone(), + } + } +} + +/// Store information about a transcript aligning to a genome. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct GenomeAlignment { + /// The genome build identifier. + pub genome_build: Assembly, + /// Accession of the contig sequence. + pub contig: String, + /// CDS end position, `-1` to indicate `None`. + pub cds_start: Option, + /// CDS end position, `-1` to indicate `None`. + pub cds_end: Option, + /// The strand. + pub strand: Strand, + /// Exons of the alignment. + pub exons: Vec, +} + +impl TryFrom for GenomeAlignment { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::GenomeAlignment) -> Result { + Ok(GenomeAlignment { + genome_build: Assembly::try_from(pbs::txs::GenomeBuild::try_from(value.genome_build)?)?, + contig: value.contig.clone(), + cds_start: value.cds_start, + cds_end: value.cds_end, + strand: Strand::from(pbs::txs::Strand::try_from(value.strand)?), + exons: value.exons.into_iter().map(Into::into).collect(), + }) + } +} + +/// Transcript information. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct Transcript { + /// Transcript accession with version, e.g., `"NM_007294.3"` or `"ENST00000461574.1"` for BRCA1. + pub id: String, + /// HGNC symbol, e.g., `"BRCA1"` + pub gene_symbol: String, + /// HGNC gene identifier, e.g., `"1100"` for BRCA1. + pub gene_id: String, + /// Transcript biotype. + pub biotype: TranscriptBiotype, + /// Transcript flags. + pub tags: Vec, + /// Identifier of the corresponding protein. + pub protein: Option, + /// CDS start codon. + pub start_codon: Option, + /// CDS stop codon. + pub stop_codon: Option, + /// Alignments on the different genome builds. + pub genome_alignments: Vec, + /// Whether this transcript has an issue (e.g. MissingStopCodon), cf. `mehari::db::create::mod::Reason`. + pub filtered: Option, + /// Reason for filtering. + pub filter_reason: ::core::option::Option, +} + +impl TryFrom for Transcript { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::Transcript) -> Result { + Ok(Transcript { + id: value.id.clone(), + gene_symbol: value.gene_symbol.clone(), + gene_id: value.gene_id.clone(), + biotype: TranscriptBiotype::try_from(pbs::txs::TranscriptBiotype::try_from( + value.biotype, + )?)?, + tags: value + .tags + .into_iter() + .map(|i32_tag| -> Result<_, anyhow::Error> { + TranscriptTag::try_from(pbs::txs::TranscriptTag::try_from(i32_tag)?) + }) + .collect::, _>>()?, + protein: value.protein.clone(), + start_codon: value.start_codon, + stop_codon: value.stop_codon, + genome_alignments: value + .genome_alignments + .into_iter() + .map(TryInto::try_into) + .collect::>()?, + filtered: value.filtered, + filter_reason: value.filter_reason, + }) + } +} + +/// Response of the `/api/v1/genes/transcripts` endpoint. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct GenesTranscriptsListResponse { + /// The transcripts for the gene. + pub transcripts: Vec, + /// The token to continue from a previous query. + pub next_page_token: Option, +} + +impl TryFrom for GenesTranscriptsListResponse { + type Error = anyhow::Error; + + fn try_from(value: pbs::server::GeneTranscriptsResponse) -> Result { + Ok(GenesTranscriptsListResponse { + transcripts: value + .transcripts + .into_iter() + .map(TryInto::try_into) + .collect::>()?, + next_page_token: value.next_page_token, + }) + } +} + +/// Query for consequence of a variant. +#[allow(clippy::unused_async)] +#[utoipa::path( + get, + operation_id = "genesTranscriptsList", + responses( + (status = 200, description = "Transcripts for the selected gene.", body = GenesTranscriptsListResponse), + (status = 500, description = "Internal server error.", body = CustomError) + ) +)] +#[get("/api/v1/genes/transcripts")] +async fn handle_with_openapi( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, CustomError> { + let result = genes_tx_impl(data, query.into_inner().into())?; + Ok(Json(result.try_into().map_err(|e| { + CustomError::new(anyhow::anyhow!("Conversion error: {}", e)) + })?)) } diff --git a/src/server/run/actix_server/mod.rs b/src/server/run/actix_server/mod.rs index 9885e834..1511a144 100644 --- a/src/server/run/actix_server/mod.rs +++ b/src/server/run/actix_server/mod.rs @@ -59,6 +59,7 @@ pub async fn main( actix_web::App::new() .app_data(data.clone()) .service(gene_txs::handle) + .service(gene_txs::handle_with_openapi) .service(seqvars_csq::handle) .service(strucvars_csq::handle) .service(versions::handle) diff --git a/src/server/run/actix_server/versions.rs b/src/server/run/actix_server/versions.rs index bd08138d..82c0e5e2 100644 --- a/src/server/run/actix_server/versions.rs +++ b/src/server/run/actix_server/versions.rs @@ -3,7 +3,7 @@ use actix_web::{ web::{self, Data, Json, Path}, }; -use crate::annotate::seqvars::provider::Provider; +use crate::{annotate::seqvars::provider::Provider, pbs}; use super::CustomError; @@ -41,6 +41,27 @@ impl From for Assembly { } } +impl From for pbs::txs::GenomeBuild { + fn from(val: Assembly) -> Self { + match val { + Assembly::Grch37 => pbs::txs::GenomeBuild::Grch37, + Assembly::Grch38 => pbs::txs::GenomeBuild::Grch38, + } + } +} + +impl TryFrom for Assembly { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::GenomeBuild) -> Result { + match value { + pbs::txs::GenomeBuild::Grch37 => Ok(Self::Grch37), + pbs::txs::GenomeBuild::Grch38 => Ok(Self::Grch38), + _ => Err(anyhow::anyhow!("Unsupported assembly")), + } + } +} + /// Software version specification. #[derive(Clone, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize, utoipa::ToSchema)] pub struct SoftwareVersions { @@ -87,7 +108,7 @@ impl DataVersionEntry { } } -/// Response of the `/v1/version` endpoint. +/// Response of the `/api/v1/version` endpoint. #[derive(Clone, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize, utoipa::ToSchema)] pub struct VersionsInfoResponse { /// Software versions specification. From 80902db4437def02c364dafc48b2d645b23cd56e Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 11 Nov 2024 07:38:20 +0100 Subject: [PATCH 2/3] wip --- openapi.schema.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.schema.yaml b/openapi.schema.yaml index 6d7eea41..b833d1ec 100644 --- a/openapi.schema.yaml +++ b/openapi.schema.yaml @@ -77,7 +77,7 @@ components: description: Version of the `hgvs` crate. VersionsInfoResponse: type: object - description: Response of the `/v1/version` endpoint. + description: Response of the `/api/v1/version` endpoint. required: - software - data From c89ff6fad44f4cde8334d2f578bbffe8e474eb08 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 11 Nov 2024 07:57:34 +0100 Subject: [PATCH 3/3] feat: /api/v1/strucvars/csq endpoint with OpenAPI (#607) --- openapi.schema.yaml | 136 +++++++++++++++++++ src/annotate/strucvars/csq.rs | 124 ++++++++++------- src/common/mod.rs | 1 + src/server/run/actix_server/gene_txs.rs | 8 +- src/server/run/actix_server/mod.rs | 1 + src/server/run/actix_server/strucvars_csq.rs | 131 ++++++++++++++++-- src/server/run/mod.rs | 20 ++- 7 files changed, 358 insertions(+), 63 deletions(-) diff --git a/openapi.schema.yaml b/openapi.schema.yaml index b833d1ec..f07a5785 100644 --- a/openapi.schema.yaml +++ b/openapi.schema.yaml @@ -9,6 +9,59 @@ info: name: MIT version: 0.29.6 paths: + /api/v1/strucvars/csq: + get: + tags: + - strucvars_csq + summary: Query for consequence of a variant. + operationId: strucvarsCsq + parameters: + - name: genome_release + in: query + description: The assembly. + required: true + schema: + $ref: '#/components/schemas/GenomeRelease' + - name: chromosome + in: query + description: Chromosome. + required: true + schema: + type: string + - name: start + in: query + description: 1-based start position. + required: true + schema: + type: integer + format: int32 + - name: stop + in: query + description: 1-based stop position, ignored for INS. + required: false + schema: + type: integer + format: int32 + nullable: true + - name: sv_type + in: query + description: The variant type to use for annotation. + required: true + schema: + $ref: '#/components/schemas/StrucvarsSvType' + responses: + '200': + description: Strucvars consequence information. + content: + application/json: + schema: + $ref: '#/components/schemas/StrucvarsCsqResponse' + '500': + description: Internal server error. + content: + application/json: + schema: + $ref: '#/components/schemas/CustomError' /api/v1/versionsInfo: get: tags: @@ -62,6 +115,12 @@ components: type: string description: Version of the Ensembl database, if any. nullable: true + GenomeRelease: + type: string + description: Select the genome release to use. + enum: + - grch37 + - grch38 SoftwareVersions: type: object description: Software version specification. @@ -75,6 +134,83 @@ components: hgvs_rs: type: string description: Version of the `hgvs` crate. + StrucvarsCsqQuery: + type: object + description: Query parameters of the `/api/v1/strucvars/csq` endpoint. + required: + - genome_release + - chromosome + - start + - sv_type + properties: + genome_release: + $ref: '#/components/schemas/GenomeRelease' + chromosome: + type: string + description: Chromosome. + start: + type: integer + format: int32 + description: 1-based start position. + stop: + type: integer + format: int32 + description: 1-based stop position, ignored for INS. + nullable: true + sv_type: + $ref: '#/components/schemas/StrucvarsSvType' + StrucvarsCsqResponse: + type: object + description: Response of the `/api/v1/strucvars/csq` endpoint. + required: + - version + - query + - result + properties: + version: + $ref: '#/components/schemas/VersionsInfoResponse' + query: + $ref: '#/components/schemas/StrucvarsCsqQuery' + result: + type: array + items: + $ref: '#/components/schemas/StrucvarsGeneTranscriptEffects' + description: The resulting records for the affected genes. + StrucvarsGeneTranscriptEffects: + type: object + description: Explanation of transcript effect per individual gene. + required: + - hgnc_id + - transcript_effects + properties: + hgnc_id: + type: string + description: HGNC identifier + transcript_effects: + type: array + items: + $ref: '#/components/schemas/StrucvarsTranscriptEffect' + description: Transcript effects for the gene. + StrucvarsSvType: + type: string + description: Structural Variant type. + enum: + - DEL + - DUP + - INS + - INV + - BND + StrucvarsTranscriptEffect: + type: string + description: Enumeration for effect on transcript. + enum: + - transcript_variant + - exon_variant + - splice_region_variant + - intron_variant + - upstream_variant + - downstream_variant + - intergenic_variant VersionsInfoResponse: type: object description: Response of the `/api/v1/version` endpoint. diff --git a/src/annotate/strucvars/csq.rs b/src/annotate/strucvars/csq.rs index 9923eb76..a61ecd5f 100644 --- a/src/annotate/strucvars/csq.rs +++ b/src/annotate/strucvars/csq.rs @@ -12,10 +12,20 @@ use crate::{ /// Enumeration for effect on transcript. #[derive( - serde::Serialize, serde::Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy, + serde::Serialize, + serde::Deserialize, + PartialEq, + Eq, + PartialOrd, + Ord, + Debug, + Clone, + Copy, + utoipa::ToSchema, )] #[serde(rename_all = "snake_case")] -pub enum TranscriptEffect { +// #[schema(as=StrucvarsTranscriptEffect)] // TODO: rename back to TranscriptEffect once utoipa's as= is fixed. +pub enum StrucvarsTranscriptEffect { /// Affects the full transcript. TranscriptVariant, /// An exon is affected by the SV. @@ -32,17 +42,17 @@ pub enum TranscriptEffect { IntergenicVariant, } -impl TranscriptEffect { +impl StrucvarsTranscriptEffect { /// Return vector with all transcript effects. - pub fn vec_all() -> Vec { + pub fn vec_all() -> Vec { vec![ - TranscriptEffect::TranscriptVariant, - TranscriptEffect::ExonVariant, - TranscriptEffect::SpliceRegionVariant, - TranscriptEffect::IntronVariant, - TranscriptEffect::UpstreamVariant, - TranscriptEffect::DownstreamVariant, - TranscriptEffect::IntergenicVariant, + StrucvarsTranscriptEffect::TranscriptVariant, + StrucvarsTranscriptEffect::ExonVariant, + StrucvarsTranscriptEffect::SpliceRegionVariant, + StrucvarsTranscriptEffect::IntronVariant, + StrucvarsTranscriptEffect::UpstreamVariant, + StrucvarsTranscriptEffect::DownstreamVariant, + StrucvarsTranscriptEffect::IntergenicVariant, ] } } @@ -51,9 +61,20 @@ impl TranscriptEffect { pub mod interface { /// Structural Variant type. #[derive( - serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, + serde::Serialize, + serde::Deserialize, + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + utoipa::ToSchema, )] - pub enum StrucVarType { + // #[schema(as=StrucvarsSvType)] // TODO: rename back to StrucVarType once utoipa's as= is fixed. + pub enum StrucvarsSvType { #[serde(rename = "DEL")] Del, #[serde(rename = "DUP")] @@ -109,7 +130,7 @@ pub mod interface { fn stop(&self) -> i32; /// Type of the structural variant - fn sv_type(&self) -> StrucVarType; + fn sv_type(&self) -> StrucvarsSvType; /// The strand orientation of the structural variant, if applicable. fn strand_orientation(&self) -> StrandOrientation; } @@ -125,16 +146,17 @@ struct TxRegion { // "arbitrary" number no: usize, // effect of the transcript (encodes region type) - effect: TranscriptEffect, + effect: StrucvarsTranscriptEffect, } /// Explanation of transcript effect per individual gene. -#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] -pub struct GeneTranscriptEffects { +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, utoipa::ToSchema)] +// #[schema(as=StrucvarsGeneTranscriptEffects)] // TODO: rename back to GeneTranscriptEffects once utoipa's as= is fixed. +pub struct StrucvarsGeneTranscriptEffects { /// HGNC identifier hgnc_id: String, /// Transcript effects for the gene. - transcript_effects: Vec, + transcript_effects: Vec, } /// Length of the upstream/downstream region. @@ -188,9 +210,9 @@ fn tx_regions(tx: &Transcript) -> Vec { end: exon_alignment.alt_start_i - 1, no, effect: if genome_alignment.strand == Strand::Plus as i32 { - TranscriptEffect::UpstreamVariant + StrucvarsTranscriptEffect::UpstreamVariant } else { - TranscriptEffect::DownstreamVariant + StrucvarsTranscriptEffect::DownstreamVariant }, }); } else { @@ -199,7 +221,7 @@ fn tx_regions(tx: &Transcript) -> Vec { begin: (exon_alignment.alt_start_i - 1) - 8, end: (exon_alignment.alt_start_i - 1) + 3, no, - effect: TranscriptEffect::SpliceRegionVariant, + effect: StrucvarsTranscriptEffect::SpliceRegionVariant, }) } @@ -210,9 +232,9 @@ fn tx_regions(tx: &Transcript) -> Vec { end: exon_alignment.alt_end_i + X_STREAM, no, effect: if genome_alignment.strand == Strand::Plus as i32 { - TranscriptEffect::DownstreamVariant + StrucvarsTranscriptEffect::DownstreamVariant } else { - TranscriptEffect::UpstreamVariant + StrucvarsTranscriptEffect::UpstreamVariant }, }); } else { @@ -221,7 +243,7 @@ fn tx_regions(tx: &Transcript) -> Vec { begin: exon_alignment.alt_end_i - 3, end: exon_alignment.alt_end_i + 8, no, - effect: TranscriptEffect::SpliceRegionVariant, + effect: StrucvarsTranscriptEffect::SpliceRegionVariant, }) } @@ -230,7 +252,7 @@ fn tx_regions(tx: &Transcript) -> Vec { begin: exon_alignment.alt_start_i - 1, end: exon_alignment.alt_end_i, no, - effect: TranscriptEffect::ExonVariant, + effect: StrucvarsTranscriptEffect::ExonVariant, }); if exon_alignment.alt_start_i != tx_start { @@ -239,7 +261,7 @@ fn tx_regions(tx: &Transcript) -> Vec { begin: prev_alt_end_i, end: exon_alignment.alt_start_i - 1, no, - effect: TranscriptEffect::IntronVariant, + effect: StrucvarsTranscriptEffect::IntronVariant, }); } @@ -251,7 +273,7 @@ fn tx_regions(tx: &Transcript) -> Vec { } /// Return the transcript region / effect for the given breakpoint. -fn gene_tx_effects_for_bp(tx: &Transcript, pos: i32) -> Vec { +fn gene_tx_effects_for_bp(tx: &Transcript, pos: i32) -> Vec { // Obtain list of regions for transcript. let regions = tx_regions(tx); @@ -263,7 +285,7 @@ fn gene_tx_effects_for_bp(tx: &Transcript, pos: i32) -> Vec { .map(|r| r.effect) .collect::>(); if result.is_empty() { - result.push(TranscriptEffect::IntergenicVariant); + result.push(StrucvarsTranscriptEffect::IntergenicVariant); } else { result.sort(); result.dedup(); @@ -272,7 +294,11 @@ fn gene_tx_effects_for_bp(tx: &Transcript, pos: i32) -> Vec { } /// Return the transcript region / effect for the given range. -fn gene_tx_effect_for_range(tx: &Transcript, start: i32, stop: i32) -> Vec { +fn gene_tx_effect_for_range( + tx: &Transcript, + start: i32, + stop: i32, +) -> Vec { // Obtain list of regions for transcript. let regions = tx_regions(tx); @@ -289,10 +315,10 @@ fn gene_tx_effect_for_range(tx: &Transcript, start: i32, stop: i32) -> Vec, -) -> Vec { +) -> Vec { // Shortcut to the `TranscriptDb`. let tx_db = mehari_tx_db .tx_db @@ -342,10 +368,12 @@ fn compute_tx_effects_for_breakpoint( // Convert the results into the final format. effects_by_gene .into_iter() - .map(|(hgnc_id, transcript_effects)| GeneTranscriptEffects { - hgnc_id, - transcript_effects, - }) + .map( + |(hgnc_id, transcript_effects)| StrucvarsGeneTranscriptEffects { + hgnc_id, + transcript_effects, + }, + ) .collect() } else { // We do not have any transcripts for this chromosome. @@ -359,7 +387,7 @@ fn compute_tx_effects_for_linear( mehari_tx_db: &TxSeqDatabase, mehari_tx_idx: &TxIntervalTrees, chrom_to_acc: &HashMap, -) -> Vec { +) -> Vec { // Shortcut to the `TranscriptDb`. let tx_db = mehari_tx_db .tx_db @@ -397,10 +425,12 @@ fn compute_tx_effects_for_linear( // Convert the results into the final format. effects_by_gene .into_iter() - .map(|(hgnc_id, transcript_effects)| GeneTranscriptEffects { - hgnc_id, - transcript_effects, - }) + .map( + |(hgnc_id, transcript_effects)| StrucvarsGeneTranscriptEffects { + hgnc_id, + transcript_effects, + }, + ) .collect() } else { // We do not have any transcripts for this chromosome. @@ -447,9 +477,9 @@ impl ConsequencePredictor { // mehari_tx_db: &TxSeqDatabase, // mehari_tx_idx: &TxIntervalTrees, // chrom_to_acc: &HashMap, - ) -> Vec { + ) -> Vec { match sv.sv_type() { - interface::StrucVarType::Ins | interface::StrucVarType::Bnd => { + interface::StrucvarsSvType::Ins | interface::StrucvarsSvType::Bnd => { compute_tx_effects_for_breakpoint( sv, &self.provider.tx_seq_db, @@ -457,9 +487,9 @@ impl ConsequencePredictor { &self.chrom_to_acc, ) } - interface::StrucVarType::Del - | interface::StrucVarType::Dup - | interface::StrucVarType::Inv => compute_tx_effects_for_linear( + interface::StrucvarsSvType::Del + | interface::StrucvarsSvType::Dup + | interface::StrucvarsSvType::Inv => compute_tx_effects_for_linear( sv, &self.provider.tx_seq_db, &self.provider.tx_trees, diff --git a/src/common/mod.rs b/src/common/mod.rs index 7188eb8e..f11b6d7d 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -46,6 +46,7 @@ pub fn trace_rss_now() { Eq, Hash, Default, + utoipa::ToSchema, )] #[serde(rename_all = "snake_case")] pub enum GenomeRelease { diff --git a/src/server/run/actix_server/gene_txs.rs b/src/server/run/actix_server/gene_txs.rs index 0d1ca01e..0fd13f81 100644 --- a/src/server/run/actix_server/gene_txs.rs +++ b/src/server/run/actix_server/gene_txs.rs @@ -1,4 +1,6 @@ -//! Implementation of `/seqvars/csq` endpoint. +//! Implementation of endpoint `/api/v1/genes/transcripts`. +//! +//! Also includes the implementation of the `/genes/txs` endpoint (deprecated). use crate::common::GenomeRelease; use crate::pbs; @@ -73,7 +75,7 @@ fn genes_tx_impl( }) } -/// Implementation of the `/genes/txs` endpoint. +/// Query for transcripts of a gene. #[allow(clippy::unused_async)] #[get("/genes/txs")] async fn handle( @@ -337,7 +339,7 @@ impl TryFrom for GenesTranscriptsListRespo } } -/// Query for consequence of a variant. +/// Query for transcripts of a gene. #[allow(clippy::unused_async)] #[utoipa::path( get, diff --git a/src/server/run/actix_server/mod.rs b/src/server/run/actix_server/mod.rs index 1511a144..f96169b6 100644 --- a/src/server/run/actix_server/mod.rs +++ b/src/server/run/actix_server/mod.rs @@ -62,6 +62,7 @@ pub async fn main( .service(gene_txs::handle_with_openapi) .service(seqvars_csq::handle) .service(strucvars_csq::handle) + .service(strucvars_csq::handle_with_openapi) .service(versions::handle) .service( utoipa_swagger_ui::SwaggerUi::new("/swagger-ui/{_:.*}") diff --git a/src/server/run/actix_server/strucvars_csq.rs b/src/server/run/actix_server/strucvars_csq.rs index 5eb2959d..d60858d5 100644 --- a/src/server/run/actix_server/strucvars_csq.rs +++ b/src/server/run/actix_server/strucvars_csq.rs @@ -1,3 +1,7 @@ +//! Implementation of endpoint `/api/v1/strucvars/csq`. +//! +//! Also includes the implementation of the `/strucvars/csq` endpoint (deprecated). + use actix_web::{ get, web::{self, Data, Json, Path}, @@ -5,10 +9,16 @@ use actix_web::{ }; use crate::{ - annotate::strucvars::csq::{interface, GeneTranscriptEffects}, + annotate::strucvars::csq::{ + interface::{self, StrucvarsSvType}, + StrucvarsGeneTranscriptEffects, + }, common::GenomeRelease, + server::run::actix_server::CustomError, }; +use super::versions::VersionsInfoResponse; + /// Parameters for `/strucvars/csq`. /// #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] @@ -41,21 +51,21 @@ impl interface::StrucVar for Query { } fn stop(&self) -> i32 { - if self.sv_type() == interface::StrucVarType::Ins { + if self.sv_type() == interface::StrucvarsSvType::Ins { self.start } else { self.stop.unwrap_or(self.start) } } - fn sv_type(&self) -> interface::StrucVarType { + fn sv_type(&self) -> interface::StrucvarsSvType { match self.sv_type.to_uppercase().as_ref() { - "DEL" => interface::StrucVarType::Del, - "DUP" => interface::StrucVarType::Dup, - "INS" => interface::StrucVarType::Ins, - "BND" => interface::StrucVarType::Bnd, - "INV" => interface::StrucVarType::Inv, - _ => interface::StrucVarType::Del, + "DEL" => interface::StrucvarsSvType::Del, + "DUP" => interface::StrucvarsSvType::Dup, + "INS" => interface::StrucvarsSvType::Ins, + "BND" => interface::StrucvarsSvType::Bnd, + "INV" => interface::StrucvarsSvType::Inv, + _ => interface::StrucvarsSvType::Del, } } @@ -72,7 +82,7 @@ struct Container { /// The original query records. pub query: Query, /// The resulting records for the scored genes. - pub result: Vec, + pub result: Vec, } /// Query for consequence of a variant. @@ -103,3 +113,104 @@ async fn handle( Ok(Json(result)) } + +/// Query parameters of the `/api/v1/strucvars/csq` endpoint. +#[derive( + Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::IntoParams, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +#[serde_with::skip_serializing_none] +pub(crate) struct StrucvarsCsqQuery { + /// The assembly. + pub genome_release: GenomeRelease, + /// Chromosome. + pub chromosome: String, + /// 1-based start position. + pub start: i32, + /// 1-based stop position, ignored for INS. + pub stop: Option, + /// The variant type to use for annotation. + pub sv_type: StrucvarsSvType, +} + +impl interface::StrucVar for StrucvarsCsqQuery { + fn chrom(&self) -> String { + self.chromosome.clone() + } + + fn chrom2(&self) -> String { + self.chromosome.clone() + } + + fn start(&self) -> i32 { + self.start + } + + fn stop(&self) -> i32 { + if self.sv_type() == interface::StrucvarsSvType::Ins { + self.start + } else { + self.stop.unwrap_or(self.start) + } + } + + fn sv_type(&self) -> interface::StrucvarsSvType { + self.sv_type + } + + fn strand_orientation(&self) -> interface::StrandOrientation { + interface::StrandOrientation::NotApplicable + } +} + +/// Response of the `/api/v1/strucvars/csq` endpoint. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +pub(crate) struct StrucvarsCsqResponse { + /// Version information. + pub version: VersionsInfoResponse, + /// The original query record. + pub query: StrucvarsCsqQuery, + /// The resulting records for the affected genes. + pub result: Vec, +} + +/// Query for consequence of a variant. +#[allow(clippy::unused_async)] +#[utoipa::path( + get, + operation_id = "strucvarsCsq", + params( + StrucvarsCsqQuery + ), + responses( + (status = 200, description = "Strucvars consequence information.", body = StrucvarsCsqResponse), + (status = 500, description = "Internal server error.", body = CustomError) + ) +)] +#[get("/api/v1/strucvars/csq")] +async fn handle_with_openapi( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, CustomError> { + let predictor = data + .strucvars_predictors + .get(&query.genome_release) + .ok_or_else(|| { + super::CustomError::new(anyhow::anyhow!( + "genome release not supported: {:?}", + &query.genome_release + )) + })?; + + let result = predictor.compute_tx_effects(&query.clone().into_inner()); + + let result = StrucvarsCsqResponse { + version: VersionsInfoResponse::from_web_server_data(data.into_inner().as_ref()) + .map_err(|e| CustomError::new(anyhow::anyhow!("Problem determining version: {}", e)))?, + query: query.into_inner(), + result, + }; + + Ok(Json(result)) +} diff --git a/src/server/run/mod.rs b/src/server/run/mod.rs index 009360e0..d66fbbd3 100644 --- a/src/server/run/mod.rs +++ b/src/server/run/mod.rs @@ -16,22 +16,36 @@ pub mod actix_server; /// Module with OpenAPI documentation. pub mod openapi { + use crate::annotate::strucvars::csq::interface::StrucvarsSvType; + use crate::annotate::strucvars::csq::{ + StrucvarsGeneTranscriptEffects, StrucvarsTranscriptEffect, + }; + use crate::common::GenomeRelease; + use crate::server::run::actix_server::strucvars_csq::{ + StrucvarsCsqQuery, StrucvarsCsqResponse, + }; use crate::server::run::actix_server::versions::{ Assembly, DataVersionEntry, SoftwareVersions, VersionsInfoResponse, }; - use super::actix_server::{versions, CustomError}; + use super::actix_server::{strucvars_csq, versions, CustomError}; /// Utoipa-based `OpenAPI` generation helper. #[derive(utoipa::OpenApi)] #[openapi( - paths(versions::handle), + paths(versions::handle, strucvars_csq::handle_with_openapi), components(schemas( Assembly, CustomError, VersionsInfoResponse, SoftwareVersions, - DataVersionEntry + DataVersionEntry, + StrucvarsCsqResponse, + StrucvarsCsqQuery, + StrucvarsGeneTranscriptEffects, + StrucvarsSvType, + GenomeRelease, + StrucvarsTranscriptEffect, )) )] pub struct ApiDoc;