From 91323124dcb872719a07b4aeec309667e91b540a Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 11 Nov 2024 07:56:54 +0100 Subject: [PATCH] feat: provide genes/transcripts endpoint with openapi (#605) (#610) --- codecov.yml | 31 +++ openapi.schema.yaml | 2 +- src/server/run/actix_server/gene_txs.rs | 319 ++++++++++++++++++++++-- src/server/run/actix_server/mod.rs | 1 + src/server/run/actix_server/versions.rs | 25 +- 5 files changed, 361 insertions(+), 17 deletions(-) diff --git a/codecov.yml b/codecov.yml index 6a2971b7..72ba1080 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,2 +1,33 @@ +# For more configuration details: +# https://docs.codecov.io/docs/codecov-yaml + +# Check if this file is valid by running in bash: +# curl -X POST --data-binary @.codecov.yml https://codecov.io/validate + +# Codecov configuration +# --------------------- +codecov: + +# Coverage configuration +# ---------------------- +coverage: + status: + patch: false + + range: 70..90 # First number represents red, and second represents green + # (default is 70..100) + round: down # up, down, or nearest + precision: 0 # Number of decimal places, between 0 and 5 + +# Ignoring Paths +# -------------- +# which folders/files to ignore ignore: - "misc/*.py" + +# Pull request comments: +# ---------------------- +# Diff is the Coverage Diff of the pull request. +# Files are the files impacted by the pull request +comment: + layout: diff, files # accepted in any order: reach, diff, flags, and/or files diff --git a/openapi.schema.yaml b/openapi.schema.yaml index 6d7eea41..b833d1ec 100644 --- a/openapi.schema.yaml +++ b/openapi.schema.yaml @@ -77,7 +77,7 @@ components: description: Version of the `hgvs` crate. VersionsInfoResponse: type: object - description: Response of the `/v1/version` endpoint. + description: Response of the `/api/v1/version` endpoint. required: - software - data diff --git a/src/server/run/actix_server/gene_txs.rs b/src/server/run/actix_server/gene_txs.rs index bc3e12f2..0d1ca01e 100644 --- a/src/server/run/actix_server/gene_txs.rs +++ b/src/server/run/actix_server/gene_txs.rs @@ -1,39 +1,44 @@ //! Implementation of `/seqvars/csq` endpoint. use crate::common::GenomeRelease; +use crate::pbs; use crate::pbs::server::{GeneTranscriptsQuery, GeneTranscriptsResponse}; use crate::pbs::txs::GenomeBuild; +use crate::server::run::actix_server::CustomError; use actix_web::{ get, web::{self, Data, Json, Path}, }; use hgvs::data::interface::Provider as _; +use super::versions::Assembly; + /// Maximal page size. static PAGE_SIZE_MAX: i32 = 1000; /// Default page size. static PAGE_SIZE_DEFAULT: i32 = 100; -#[allow(clippy::unused_async)] -#[get("/genes/txs")] -async fn handle( +/// Core implementation of the `/genes/txs` and `/api/v1/genes/transcripts` endpoints. +/// +/// For now takes the `GeneTranscriptsQuery` as the argument and returns +/// the `GeneTranscriptsResponse` as the result. +fn genes_tx_impl( data: Data, - _path: Path<()>, - query: web::Query, -) -> actix_web::Result, super::CustomError> { + query: GeneTranscriptsQuery, +) -> Result { let GeneTranscriptsQuery { genome_build, hgnc_id, page_size, next_page_token, - } = query.clone().into_inner(); + } = query; let genome_build = GenomeBuild::try_from(genome_build.unwrap_or(GenomeBuild::Grch37 as i32)) - .map_err(|e| super::CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; + .map_err(|e| CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; let genome_release = GenomeRelease::try_from(genome_build) - .map_err(|e| super::CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; + .map_err(|e| CustomError::new(anyhow::anyhow!("Invalid genome build: {}", e)))?; let hgnc_id = hgnc_id .as_ref() - .ok_or_else(|| super::CustomError::new(anyhow::anyhow!("No HGNC ID provided.")))?; + .ok_or_else(|| CustomError::new(anyhow::anyhow!("No HGNC ID provided.")))?; let page_size = page_size .unwrap_or(PAGE_SIZE_DEFAULT) .min(PAGE_SIZE_MAX) @@ -42,10 +47,10 @@ async fn handle( let provider = data .provider .get(&genome_release) - .ok_or_else(|| super::CustomError::new(anyhow::anyhow!("No provider available.")))?; + .ok_or_else(|| CustomError::new(anyhow::anyhow!("No provider available.")))?; let tx_acs = provider .get_tx_for_gene(hgnc_id) - .map_err(|e| super::CustomError::new(anyhow::anyhow!("No transcripts found: {}", e)))? + .map_err(|e| CustomError::new(anyhow::anyhow!("No transcripts found: {}", e)))? .into_iter() .map(|tx| tx.tx_ac) .collect::>(); @@ -55,7 +60,7 @@ async fn handle( .unwrap_or(0); let last = (first + page_size as usize).min(tx_acs.len()); - Ok(Json(GeneTranscriptsResponse { + Ok(GeneTranscriptsResponse { transcripts: tx_acs[first..last] .iter() .filter_map(|tx_ac| provider.get_tx(tx_ac)) @@ -65,5 +70,291 @@ async fn handle( } else { None }, - })) + }) +} + +/// Implementation of the `/genes/txs` endpoint. +#[allow(clippy::unused_async)] +#[get("/genes/txs")] +async fn handle( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, CustomError> { + Ok(Json(genes_tx_impl(data, query.into_inner())?)) +} + +/// Query arguments for the `/api/v1/genes/transcripts` endpoint. +#[derive(Debug, Clone, serde::Deserialize, utoipa::ToSchema)] +struct GenesTranscriptsListQuery { + /// HGNC gene ID. + pub hgnc_id: String, + /// Genome build. + pub genome_build: Assembly, + /// Page size. + pub page_size: Option, + /// Next page token. + pub next_page_token: Option, +} + +impl From for pbs::server::GeneTranscriptsQuery { + fn from(val: GenesTranscriptsListQuery) -> Self { + pbs::server::GeneTranscriptsQuery { + genome_build: Some(Into::::into(val.genome_build) as i32), + hgnc_id: Some(val.hgnc_id), + page_size: val.page_size, + next_page_token: val.next_page_token, + } + } +} + +/// Enumeration for `Transcript::biotype`. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case")] +enum TranscriptBiotype { + /// Coding transcript. + Coding, + /// Non-coding transcript. + NonCoding, +} + +impl TryFrom for TranscriptBiotype { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::TranscriptBiotype) -> Result { + match value { + pbs::txs::TranscriptBiotype::Coding => Ok(TranscriptBiotype::Coding), + pbs::txs::TranscriptBiotype::NonCoding => Ok(TranscriptBiotype::NonCoding), + _ => Err(anyhow::anyhow!("Invalid biotype: {:?}", value)), + } + } +} + +// Bit values for the transcript tags. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case")] +enum TranscriptTag { + /// Member of Ensembl basic. + Basic, + /// Member of Ensembl canonical. + EnsemblCanonical, + /// Member of MANE Select. + ManeSelect, + /// Member of MANE Plus Clinical. + ManePlusClinical, + /// Member of RefSeq Select. + RefSeqSelect, + /// Flagged as being a selenoprotein (UGA => selenon). + Selenoprotein, + /// Member of GENCODE Primary + GencodePrimary, + /// Catchall for other tags. + Other, +} + +impl TryFrom for TranscriptTag { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::TranscriptTag) -> Result { + match value { + pbs::txs::TranscriptTag::Basic => Ok(TranscriptTag::Basic), + pbs::txs::TranscriptTag::EnsemblCanonical => Ok(TranscriptTag::EnsemblCanonical), + pbs::txs::TranscriptTag::ManeSelect => Ok(TranscriptTag::ManeSelect), + pbs::txs::TranscriptTag::ManePlusClinical => Ok(TranscriptTag::ManePlusClinical), + pbs::txs::TranscriptTag::RefSeqSelect => Ok(TranscriptTag::RefSeqSelect), + pbs::txs::TranscriptTag::Selenoprotein => Ok(TranscriptTag::Selenoprotein), + pbs::txs::TranscriptTag::GencodePrimary => Ok(TranscriptTag::GencodePrimary), + pbs::txs::TranscriptTag::Other => Ok(TranscriptTag::Other), + _ => Err(anyhow::anyhow!("Invalid transcript tag: {:?}", value)), + } + } +} + +/// Enumeration for the two strands of the genome. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case")] +enum Strand { + /// unknown + Unknown, + /// Forward / plus + Plus, + /// Reverse / minus + Minus, +} + +impl From for Strand { + fn from(value: pbs::txs::Strand) -> Self { + match value { + pbs::txs::Strand::Unknown => Strand::Unknown, + pbs::txs::Strand::Plus => Strand::Plus, + pbs::txs::Strand::Minus => Strand::Minus, + } + } +} + +/// Store the alignment of one exon to the reference. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct ExonAlignment { + /// Start position on reference. + pub alt_start_i: i32, + /// End position on reference. + pub alt_end_i: i32, + /// Exon number. + pub ord: i32, + /// CDS start coordinate. + pub alt_cds_start_i: Option, + /// CDS end coordinate. + pub alt_cds_end_i: Option, + /// CIGAR string of alignment, empty indicates full matches. + pub cigar: String, +} + +impl From for ExonAlignment { + fn from(value: pbs::txs::ExonAlignment) -> Self { + ExonAlignment { + alt_start_i: value.alt_start_i, + alt_end_i: value.alt_end_i, + ord: value.ord, + alt_cds_start_i: value.alt_cds_start_i, + alt_cds_end_i: value.alt_cds_end_i, + cigar: value.cigar.clone(), + } + } +} + +/// Store information about a transcript aligning to a genome. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct GenomeAlignment { + /// The genome build identifier. + pub genome_build: Assembly, + /// Accession of the contig sequence. + pub contig: String, + /// CDS end position, `-1` to indicate `None`. + pub cds_start: Option, + /// CDS end position, `-1` to indicate `None`. + pub cds_end: Option, + /// The strand. + pub strand: Strand, + /// Exons of the alignment. + pub exons: Vec, +} + +impl TryFrom for GenomeAlignment { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::GenomeAlignment) -> Result { + Ok(GenomeAlignment { + genome_build: Assembly::try_from(pbs::txs::GenomeBuild::try_from(value.genome_build)?)?, + contig: value.contig.clone(), + cds_start: value.cds_start, + cds_end: value.cds_end, + strand: Strand::from(pbs::txs::Strand::try_from(value.strand)?), + exons: value.exons.into_iter().map(Into::into).collect(), + }) + } +} + +/// Transcript information. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct Transcript { + /// Transcript accession with version, e.g., `"NM_007294.3"` or `"ENST00000461574.1"` for BRCA1. + pub id: String, + /// HGNC symbol, e.g., `"BRCA1"` + pub gene_symbol: String, + /// HGNC gene identifier, e.g., `"1100"` for BRCA1. + pub gene_id: String, + /// Transcript biotype. + pub biotype: TranscriptBiotype, + /// Transcript flags. + pub tags: Vec, + /// Identifier of the corresponding protein. + pub protein: Option, + /// CDS start codon. + pub start_codon: Option, + /// CDS stop codon. + pub stop_codon: Option, + /// Alignments on the different genome builds. + pub genome_alignments: Vec, + /// Whether this transcript has an issue (e.g. MissingStopCodon), cf. `mehari::db::create::mod::Reason`. + pub filtered: Option, + /// Reason for filtering. + pub filter_reason: ::core::option::Option, +} + +impl TryFrom for Transcript { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::Transcript) -> Result { + Ok(Transcript { + id: value.id.clone(), + gene_symbol: value.gene_symbol.clone(), + gene_id: value.gene_id.clone(), + biotype: TranscriptBiotype::try_from(pbs::txs::TranscriptBiotype::try_from( + value.biotype, + )?)?, + tags: value + .tags + .into_iter() + .map(|i32_tag| -> Result<_, anyhow::Error> { + TranscriptTag::try_from(pbs::txs::TranscriptTag::try_from(i32_tag)?) + }) + .collect::, _>>()?, + protein: value.protein.clone(), + start_codon: value.start_codon, + stop_codon: value.stop_codon, + genome_alignments: value + .genome_alignments + .into_iter() + .map(TryInto::try_into) + .collect::>()?, + filtered: value.filtered, + filter_reason: value.filter_reason, + }) + } +} + +/// Response of the `/api/v1/genes/transcripts` endpoint. +#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)] +struct GenesTranscriptsListResponse { + /// The transcripts for the gene. + pub transcripts: Vec, + /// The token to continue from a previous query. + pub next_page_token: Option, +} + +impl TryFrom for GenesTranscriptsListResponse { + type Error = anyhow::Error; + + fn try_from(value: pbs::server::GeneTranscriptsResponse) -> Result { + Ok(GenesTranscriptsListResponse { + transcripts: value + .transcripts + .into_iter() + .map(TryInto::try_into) + .collect::>()?, + next_page_token: value.next_page_token, + }) + } +} + +/// Query for consequence of a variant. +#[allow(clippy::unused_async)] +#[utoipa::path( + get, + operation_id = "genesTranscriptsList", + responses( + (status = 200, description = "Transcripts for the selected gene.", body = GenesTranscriptsListResponse), + (status = 500, description = "Internal server error.", body = CustomError) + ) +)] +#[get("/api/v1/genes/transcripts")] +async fn handle_with_openapi( + data: Data, + _path: Path<()>, + query: web::Query, +) -> actix_web::Result, CustomError> { + let result = genes_tx_impl(data, query.into_inner().into())?; + Ok(Json(result.try_into().map_err(|e| { + CustomError::new(anyhow::anyhow!("Conversion error: {}", e)) + })?)) } diff --git a/src/server/run/actix_server/mod.rs b/src/server/run/actix_server/mod.rs index 9885e834..1511a144 100644 --- a/src/server/run/actix_server/mod.rs +++ b/src/server/run/actix_server/mod.rs @@ -59,6 +59,7 @@ pub async fn main( actix_web::App::new() .app_data(data.clone()) .service(gene_txs::handle) + .service(gene_txs::handle_with_openapi) .service(seqvars_csq::handle) .service(strucvars_csq::handle) .service(versions::handle) diff --git a/src/server/run/actix_server/versions.rs b/src/server/run/actix_server/versions.rs index bd08138d..82c0e5e2 100644 --- a/src/server/run/actix_server/versions.rs +++ b/src/server/run/actix_server/versions.rs @@ -3,7 +3,7 @@ use actix_web::{ web::{self, Data, Json, Path}, }; -use crate::annotate::seqvars::provider::Provider; +use crate::{annotate::seqvars::provider::Provider, pbs}; use super::CustomError; @@ -41,6 +41,27 @@ impl From for Assembly { } } +impl From for pbs::txs::GenomeBuild { + fn from(val: Assembly) -> Self { + match val { + Assembly::Grch37 => pbs::txs::GenomeBuild::Grch37, + Assembly::Grch38 => pbs::txs::GenomeBuild::Grch38, + } + } +} + +impl TryFrom for Assembly { + type Error = anyhow::Error; + + fn try_from(value: pbs::txs::GenomeBuild) -> Result { + match value { + pbs::txs::GenomeBuild::Grch37 => Ok(Self::Grch37), + pbs::txs::GenomeBuild::Grch38 => Ok(Self::Grch38), + _ => Err(anyhow::anyhow!("Unsupported assembly")), + } + } +} + /// Software version specification. #[derive(Clone, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize, utoipa::ToSchema)] pub struct SoftwareVersions { @@ -87,7 +108,7 @@ impl DataVersionEntry { } } -/// Response of the `/v1/version` endpoint. +/// Response of the `/api/v1/version` endpoint. #[derive(Clone, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize, utoipa::ToSchema)] pub struct VersionsInfoResponse { /// Software versions specification.