diff --git a/Cargo.toml b/Cargo.toml index 211719c..bccc23b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } thiserror = "1.0" indexmap = { version = "2", features = ["serde"] } +biocommons-bioutils = "0.1.0" [dev-dependencies] anyhow = "1.0" diff --git a/src/data/cdot/json.rs b/src/data/cdot/json.rs index 12ae1f8..995c9c6 100644 --- a/src/data/cdot/json.rs +++ b/src/data/cdot/json.rs @@ -10,8 +10,8 @@ use crate::{ self, GeneInfoRecord, TxExonsRecord, TxForRegionRecord, TxIdentityInfo, TxInfoRecord, TxMappingOptionsRecord, TxSimilarityRecord, }, - static_data::{Assembly, ASSEMBLY_INFOS}, }; +use biocommons_bioutils::assemblies::{Assembly, ASSEMBLY_INFOS}; use bio::data_structures::interval_tree::ArrayBackedIntervalTree; use chrono::NaiveDateTime; @@ -104,7 +104,7 @@ impl interface::Provider for Provider { fn get_assembly_map( &self, - assembly: crate::static_data::Assembly, + assembly: biocommons_bioutils::assemblies::Assembly, ) -> indexmap::IndexMap { self.inner.get_assembly_map(assembly) } @@ -1082,7 +1082,7 @@ pub mod tests { }; use crate::mapper::assembly::{self, Mapper}; use crate::parser::HgvsVariant; - use crate::static_data::Assembly; + use biocommons_bioutils::assemblies::Assembly; #[test] fn test_sync() { diff --git a/src/data/interface.rs b/src/data/interface.rs index 1a4dda3..f989990 100644 --- a/src/data/interface.rs +++ b/src/data/interface.rs @@ -4,7 +4,7 @@ use chrono::NaiveDateTime; use indexmap::IndexMap; use crate::data::error::Error; -use crate::static_data::Assembly; +use biocommons_bioutils::assemblies::Assembly; /// Information about a gene. /// diff --git a/src/data/uta.rs b/src/data/uta.rs index e9d7657..24822f0 100644 --- a/src/data/uta.rs +++ b/src/data/uta.rs @@ -9,7 +9,7 @@ use std::fmt::Debug; use std::sync::Mutex; use crate::sequences::seq_md5; -use crate::static_data::{Assembly, ASSEMBLY_INFOS}; +use biocommons_bioutils::assemblies::{Assembly, ASSEMBLY_INFOS}; use crate::data::{ error::Error, interface, interface::GeneInfoRecord, interface::TxExonsRecord, diff --git a/src/data/uta_sr.rs b/src/data/uta_sr.rs index f12fcea..67d9750 100644 --- a/src/data/uta_sr.rs +++ b/src/data/uta_sr.rs @@ -92,7 +92,7 @@ impl interface::Provider for Provider { fn get_assembly_map( &self, - assembly: crate::static_data::Assembly, + assembly: biocommons_bioutils::assemblies::Assembly, ) -> indexmap::IndexMap { self.inner.get_assembly_map(assembly) } diff --git a/src/lib.rs b/src/lib.rs index 99e4f0c..bfd7d74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,5 +3,4 @@ pub mod mapper; pub mod normalizer; pub mod parser; pub mod sequences; -pub mod static_data; pub mod validator; diff --git a/src/mapper/assembly.rs b/src/mapper/assembly.rs index 586b32b..4bd3c11 100644 --- a/src/mapper/assembly.rs +++ b/src/mapper/assembly.rs @@ -8,7 +8,8 @@ use std::sync::Arc; use crate::mapper::error::Error; use crate::mapper::variant; use crate::parser::HgvsVariant; -use crate::{data::interface::Provider, static_data::Assembly, validator::ValidationLevel}; +use crate::{data::interface::Provider, validator::ValidationLevel}; +use biocommons_bioutils::assemblies::Assembly; #[derive(Debug, PartialEq, Eq, Default, Clone, Copy)] pub enum InParAssume { diff --git a/src/mapper/variant.rs b/src/mapper/variant.rs index 103c8a8..565b88c 100644 --- a/src/mapper/variant.rs +++ b/src/mapper/variant.rs @@ -1152,7 +1152,7 @@ mod test { fn get_assembly_map( &self, - _assembly: crate::static_data::Assembly, + _assembly: biocommons_bioutils::assemblies::Assembly, ) -> indexmap::IndexMap { panic!("for test use only"); } diff --git a/src/static_data/_data/CHM1_1.0.json.gz b/src/static_data/_data/CHM1_1.0.json.gz deleted file mode 100644 index e6f4ed0..0000000 Binary files a/src/static_data/_data/CHM1_1.0.json.gz and /dev/null differ diff --git a/src/static_data/_data/CHM1_1.1.json.gz b/src/static_data/_data/CHM1_1.1.json.gz deleted file mode 100644 index 92a25af..0000000 Binary files a/src/static_data/_data/CHM1_1.1.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.json.gz b/src/static_data/_data/GRCh37.json.gz deleted file mode 100644 index 9eb18d4..0000000 Binary files a/src/static_data/_data/GRCh37.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.p10.json.gz b/src/static_data/_data/GRCh37.p10.json.gz deleted file mode 100644 index 8ef2042..0000000 Binary files a/src/static_data/_data/GRCh37.p10.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.p11.json.gz b/src/static_data/_data/GRCh37.p11.json.gz deleted file mode 100644 index ba26e9c..0000000 Binary files a/src/static_data/_data/GRCh37.p11.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.p12.json.gz b/src/static_data/_data/GRCh37.p12.json.gz deleted file mode 100644 index 1fd0d20..0000000 Binary files a/src/static_data/_data/GRCh37.p12.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.p13.json.gz b/src/static_data/_data/GRCh37.p13.json.gz deleted file mode 100644 index af8f168..0000000 Binary files a/src/static_data/_data/GRCh37.p13.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.p2.json.gz b/src/static_data/_data/GRCh37.p2.json.gz deleted file mode 100644 index 81f92d6..0000000 Binary files a/src/static_data/_data/GRCh37.p2.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.p5.json.gz b/src/static_data/_data/GRCh37.p5.json.gz deleted file mode 100644 index a7a067c..0000000 Binary files a/src/static_data/_data/GRCh37.p5.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh37.p9.json.gz b/src/static_data/_data/GRCh37.p9.json.gz deleted file mode 100644 index 1663d85..0000000 Binary files a/src/static_data/_data/GRCh37.p9.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.json.gz b/src/static_data/_data/GRCh38.json.gz deleted file mode 100644 index 97b7490..0000000 Binary files a/src/static_data/_data/GRCh38.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p1.json.gz b/src/static_data/_data/GRCh38.p1.json.gz deleted file mode 100644 index d8bb447..0000000 Binary files a/src/static_data/_data/GRCh38.p1.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p10.json.gz b/src/static_data/_data/GRCh38.p10.json.gz deleted file mode 100644 index 2348e8a..0000000 Binary files a/src/static_data/_data/GRCh38.p10.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p11.json.gz b/src/static_data/_data/GRCh38.p11.json.gz deleted file mode 100644 index 4bfe9aa..0000000 Binary files a/src/static_data/_data/GRCh38.p11.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p12.json.gz b/src/static_data/_data/GRCh38.p12.json.gz deleted file mode 100644 index e51de38..0000000 Binary files a/src/static_data/_data/GRCh38.p12.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p2.json.gz b/src/static_data/_data/GRCh38.p2.json.gz deleted file mode 100644 index c721f0d..0000000 Binary files a/src/static_data/_data/GRCh38.p2.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p3.json.gz b/src/static_data/_data/GRCh38.p3.json.gz deleted file mode 100644 index 65aba13..0000000 Binary files a/src/static_data/_data/GRCh38.p3.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p4.json.gz b/src/static_data/_data/GRCh38.p4.json.gz deleted file mode 100644 index cbf9a7e..0000000 Binary files a/src/static_data/_data/GRCh38.p4.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p5.json.gz b/src/static_data/_data/GRCh38.p5.json.gz deleted file mode 100644 index 2c303e4..0000000 Binary files a/src/static_data/_data/GRCh38.p5.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p6.json.gz b/src/static_data/_data/GRCh38.p6.json.gz deleted file mode 100644 index e9aa419..0000000 Binary files a/src/static_data/_data/GRCh38.p6.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p7.json.gz b/src/static_data/_data/GRCh38.p7.json.gz deleted file mode 100644 index 5227187..0000000 Binary files a/src/static_data/_data/GRCh38.p7.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p8.json.gz b/src/static_data/_data/GRCh38.p8.json.gz deleted file mode 100644 index 158b1c2..0000000 Binary files a/src/static_data/_data/GRCh38.p8.json.gz and /dev/null differ diff --git a/src/static_data/_data/GRCh38.p9.json.gz b/src/static_data/_data/GRCh38.p9.json.gz deleted file mode 100644 index 25d11e8..0000000 Binary files a/src/static_data/_data/GRCh38.p9.json.gz and /dev/null differ diff --git a/src/static_data/_data/Makefile b/src/static_data/_data/Makefile deleted file mode 100644 index 850ebcf..0000000 --- a/src/static_data/_data/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -ASSEMBLIES := \ - CHM1_1.0 \ - CHM1_1.1 \ - GRCh37 \ - GRCh37.p10 \ - GRCh37.p11 \ - GRCh37.p12 \ - GRCh37.p13 \ - GRCh37.p2 \ - GRCh37.p5 \ - GRCh37.p9 \ - GRCh38 \ - GRCh38.p10 \ - GRCh38.p11 \ - GRCh38.p12 \ - GRCh38.p1 \ - GRCh38.p2 \ - GRCh38.p3 \ - GRCh38.p4 \ - GRCh38.p5 \ - GRCh38.p6 \ - GRCh38.p7 \ - GRCh38.p8 \ - GRCh38.p9 \ - NCBI33 \ - NCBI34 \ - NCBI35 \ - NCBI36 \ - -JSONS := $(addsuffix .json.gz,${ASSEMBLIES}) - -.PHONY: -all: $(JSONS) - -%.json.gz: - wget -O $@ https://github.com/biocommons/bioutils/raw/main/src/bioutils/_data/assemblies/$@ diff --git a/src/static_data/_data/NCBI33.json.gz b/src/static_data/_data/NCBI33.json.gz deleted file mode 100644 index 7c0a3c4..0000000 Binary files a/src/static_data/_data/NCBI33.json.gz and /dev/null differ diff --git a/src/static_data/_data/NCBI34.json.gz b/src/static_data/_data/NCBI34.json.gz deleted file mode 100644 index 237a3c1..0000000 Binary files a/src/static_data/_data/NCBI34.json.gz and /dev/null differ diff --git a/src/static_data/_data/NCBI35.json.gz b/src/static_data/_data/NCBI35.json.gz deleted file mode 100644 index 6d741d7..0000000 Binary files a/src/static_data/_data/NCBI35.json.gz and /dev/null differ diff --git a/src/static_data/_data/NCBI36.json.gz b/src/static_data/_data/NCBI36.json.gz deleted file mode 100644 index 56c7365..0000000 Binary files a/src/static_data/_data/NCBI36.json.gz and /dev/null differ diff --git a/src/static_data/mod.rs b/src/static_data/mod.rs deleted file mode 100644 index 11b5282..0000000 --- a/src/static_data/mod.rs +++ /dev/null @@ -1,98 +0,0 @@ -//! Static data. - -use std::io::Read; - -use enum_map::{enum_map, Enum, EnumMap}; -use flate2::read::GzDecoder; -use serde::Deserialize; - -const GRCH37_JSON_GZ: &[u8] = include_bytes!("_data/GRCh37.json.gz"); -const GRCH37_P10_JSON_GZ: &[u8] = include_bytes!("_data/GRCh37.p10.json.gz"); -const GRCH38_JSON_GZ: &[u8] = include_bytes!("_data/GRCh38.json.gz"); - -#[derive(Debug, Deserialize, Enum, Clone, Copy, PartialEq, Eq, Hash)] -pub enum Assembly { - Grch37, - Grch37p10, - Grch38, -} - -impl Assembly { - /// Deserialize assembly info from embedded compressed JSON. - fn load_assembly_info(&self) -> AssemblyInfo { - let payload = match self { - Assembly::Grch37 => GRCH37_JSON_GZ, - Assembly::Grch37p10 => GRCH37_P10_JSON_GZ, - Assembly::Grch38 => GRCH38_JSON_GZ, - }; - let mut d = GzDecoder::new(payload); - let mut grch37_json = String::new(); - d.read_to_string(&mut grch37_json) - .expect("should not happen; invalid gzip in embedded data"); - serde_json::from_str::(&grch37_json) - .expect("should not happen; invalid JSON in embedded data") - } -} - -#[derive(Debug, Deserialize)] -pub struct Sequence { - pub aliases: Vec, - pub assembly_unit: String, - pub genbank_ac: String, - pub length: usize, - pub name: String, - pub refseq_ac: String, - pub relationship: String, - pub sequence_role: String, -} - -#[derive(Debug, Deserialize)] -pub struct AssemblyInfo { - pub date: String, - pub description: String, - pub genbank_ac: String, - pub name: String, - pub refseq_ac: String, - pub sequences: Vec, - pub submitter: String, -} - -lazy_static::lazy_static! { - /// Provide information about the assemblies. - pub static ref ASSEMBLY_INFOS: EnumMap = enum_map! { - Assembly::Grch37 => Assembly::Grch37.load_assembly_info(), - Assembly::Grch37p10 => Assembly::Grch37p10.load_assembly_info(), - Assembly::Grch38 => Assembly::Grch38.load_assembly_info(), - }; -} - -#[cfg(test)] -mod test { - use pretty_assertions::assert_eq; - - use crate::static_data::{Assembly, ASSEMBLY_INFOS}; - - #[test] - fn smoke() { - assert_eq!(ASSEMBLY_INFOS[Assembly::Grch37].sequences.len(), 92); - assert_eq!(ASSEMBLY_INFOS[Assembly::Grch37p10].sequences.len(), 275); - assert_eq!(ASSEMBLY_INFOS[Assembly::Grch38].sequences.len(), 455); - } -} - -// -// Copyright 2023 hgvs-rs Contributors -// Copyright 2014 Bioutils Contributors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//