diff --git a/protos/mehari/txs.proto b/protos/mehari/txs.proto index 424d73d3..dd0358eb 100644 --- a/protos/mehari/txs.proto +++ b/protos/mehari/txs.proto @@ -17,30 +17,38 @@ message SequenceDb { repeated string seqs = 3; } +// Indicates the reference assembly of the transcript database. enum Assembly { + // Unknown. ASSEMBLY_UNKNOWN = 0; + // GRCh37. ASSEMBLY_GRCH37 = 1; + // GRCh38. ASSEMBLY_GRCH38 = 2; } +// Indicates the transcript source. enum Source { + // Unknown. SOURCE_UNKNOWN = 0; + // RefSeq. SOURCE_REFSEQ = 1; + // Ensembl. SOURCE_ENSEMBL = 2; } message SourceVersion { - // Version of mehari used to build the database + // Version of mehari used to build the database. string mehari_version = 1; - // Assembly used, either GRCh37 or GRCh38 (or Unknown) + // Assembly used, either GRCh37 or GRCh38 (or Unknown). Assembly assembly = 2; - // Version of the assembly, optional + // Version of the assembly, optional. optional string assembly_version = 3; - // Source, either RefSeq or Ensembl (or Unknown) - Source source = 4; - // Version of the source, e.g. 112 for Ensembl + // Source, either RefSeq or Ensembl (or Unknown). + Source source_name = 4; + // Version of the source, e.g. 112 for Ensembl. string source_version = 5; - // Version of cdot + // Version of cdot. string cdot_version = 6; } @@ -182,8 +190,6 @@ message TxSeqDatabase { SequenceDb seq_db = 2; // The version of the database. optional string version = 3; -// // The reference assembly that this database refers to. Removed in favour of source_version -// optional string genome_release = 4; // Version information; allow repeated here to be able to keep track of information when merging databases repeated SourceVersion source_version = 5; } diff --git a/src/db/create/mod.rs b/src/db/create/mod.rs index 9b860bef..a86241cf 100644 --- a/src/db/create/mod.rs +++ b/src/db/create/mod.rs @@ -58,6 +58,7 @@ pub struct Args { pub transcript_source_version: Option, /// Version of cdot data. + #[arg(long)] pub cdot_version: String, /// Path to output protobuf file to write to. @@ -91,10 +92,14 @@ pub struct Args { pub threads: usize, } +/// Source of the transcripts. #[derive(Debug, Clone, Copy, ValueEnum)] pub enum TxSource { + /// RefSeq. RefSeq, + /// Ensembl. Ensembl, + /// Other. Other, } @@ -1759,7 +1764,7 @@ pub fn run(common: &crate::common::Args, args: &Args) -> Result<(), Error> { let assembly_version = args.assembly_version.clone(); - let source = match args.transcript_source { + let source_name = match args.transcript_source { TxSource::RefSeq => Source::Refseq, TxSource::Ensembl => Source::Ensembl, TxSource::Other => Source::Unknown, @@ -1772,7 +1777,7 @@ pub fn run(common: &crate::common::Args, args: &Args) -> Result<(), Error> { mehari_version: crate::common::version().to_string(), assembly: i32::from(assembly), assembly_version, - source: i32::from(source), + source_name: i32::from(source_name), source_version, cdot_version, } diff --git a/src/db/merge.rs b/src/db/merge.rs index b85d399b..3915e44e 100644 --- a/src/db/merge.rs +++ b/src/db/merge.rs @@ -31,8 +31,7 @@ pub fn merge_transcript_databases( assert!(first.source_version.iter().map(|v| v.assembly).all_equal()); let assembly = first .source_version - .iter() - .next() + .first() .expect("At least one source_version entry expected") .assembly; assert!(others