Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update noodles group #447

Merged
merged 2 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 39 additions & 13 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ indexmap = { version = "2.2", features = ["serde"] }
indicatif = { version = "0.17", features = ["rayon"] }
log = "0.4"
noodles-bed = "0.12"
noodles-bgzf = "0.26"
noodles-bgzf = "0.29"
noodles-core = "0.14"
noodles-csi = "0.30"
noodles-tabix = "0.36"
noodles-vcf = "0.49"
noodles-csi = "0.33"
noodles-tabix = "0.39"
noodles-vcf = "0.55"
pbjson = "0.6"
pbjson-types = "0.6"
prost = "0.12"
Expand All @@ -58,6 +58,7 @@ tracing-subscriber = "0.3"
rustc-hash = "1.1.0"
noodles-gff = "0.27.0"
erased-serde = "0.4.2"
itertools = "0.11.0"

[build-dependencies]
prost-build = "0.12"
Expand Down
16 changes: 8 additions & 8 deletions src/common/keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,19 @@ impl Var {
}

/// Create for all alternate alleles from the given VCF record.
pub fn from_vcf_allele(value: &noodles_vcf::Record, allele_no: usize) -> Self {
let chrom = match value.chromosome() {
noodles_vcf::record::Chromosome::Name(name)
| noodles_vcf::record::Chromosome::Symbol(name) => name.to_owned(),
};
let pos: usize = value.position().into();
let pos = pos as i32;
pub fn from_vcf_allele(value: &noodles_vcf::variant::RecordBuf, allele_no: usize) -> Self {
let chrom = value.reference_sequence_name().to_string();
let pos: usize = value
.variant_start()
.expect("Telomeric breakends not supported")
.get();
let pos = i32::try_from(pos).unwrap();
let reference = value.reference_bases().to_string();
Var {
chrom,
pos,
reference,
alternative: value.alternate_bases()[allele_no].to_string(),
alternative: value.alternate_bases().as_ref()[allele_no].to_string(),
}
}
}
Expand Down
58 changes: 38 additions & 20 deletions src/common/noodles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

use std::str::FromStr;

use noodles_vcf::record::info::field;
use noodles_vcf::variant::record_buf::info::field;

/// Extract a `String` field from a record.
pub fn get_string(record: &noodles_vcf::Record, name: &str) -> Result<String, anyhow::Error> {
if let Some(Some(field::Value::String(v))) = record.info().get(&field::Key::from_str(name)?) {
pub fn get_string(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<String, anyhow::Error> {
if let Some(Some(field::Value::String(v))) = record.info().get(name) {
Ok(v.to_string())
} else if let Some(Some(field::Value::Array(field::value::Array::String(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.first().unwrap().as_ref().unwrap().to_string())
} else {
Expand All @@ -18,19 +21,22 @@
}

/// Extract a flag field from a record.
pub fn get_flag(record: &noodles_vcf::Record, name: &str) -> Result<bool, anyhow::Error> {
pub fn get_flag(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<bool, anyhow::Error> {
Ok(matches!(
record.info().get(&field::Key::from_str(name)?),
record.info().get(name),
Some(Some(field::Value::Flag))
))
}

/// Extract an `i32` field from a record.
pub fn get_i32(record: &noodles_vcf::Record, name: &str) -> Result<i32, anyhow::Error> {
if let Some(Some(field::Value::Integer(v))) = record.info().get(&field::Key::from_str(name)?) {
pub fn get_i32(record: &noodles_vcf::variant::RecordBuf, name: &str) -> Result<i32, anyhow::Error> {
if let Some(Some(field::Value::Integer(v))) = record.info().get(name) {
Ok(*v)
} else if let Some(Some(field::Value::Array(field::value::Array::Integer(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.first().unwrap().unwrap())
} else {
Expand All @@ -39,11 +45,11 @@
}

/// Extract an `f32` field from a record.
pub fn get_f32(record: &noodles_vcf::Record, name: &str) -> Result<f32, anyhow::Error> {
if let Some(Some(field::Value::Float(v))) = record.info().get(&field::Key::from_str(name)?) {
pub fn get_f32(record: &noodles_vcf::variant::RecordBuf, name: &str) -> Result<f32, anyhow::Error> {
if let Some(Some(field::Value::Float(v))) = record.info().get(name) {
Ok(*v)
} else if let Some(Some(field::Value::Array(field::value::Array::Float(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.first().unwrap().unwrap())
} else {
Expand All @@ -54,9 +60,12 @@
/// Extract an `Vec<String>` field from record with an array field.
///
/// This is different than parsing the histograms from pipe-separated strings.
pub fn get_vec_str(record: &noodles_vcf::Record, name: &str) -> Result<Vec<String>, anyhow::Error> {
pub fn get_vec_str(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<String>, anyhow::Error> {
if let Some(Some(field::Value::Array(field::value::Array::String(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.iter().flatten().cloned().collect())
} else {
Expand All @@ -67,9 +76,12 @@
/// Extract an `Vec<i32>` field from record with an array field.
///
/// This is different than parsing the histograms from pipe-separated strings.
pub fn get_vec_i32(record: &noodles_vcf::Record, name: &str) -> Result<Vec<i32>, anyhow::Error> {
pub fn get_vec_i32(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<i32>, anyhow::Error> {
if let Some(Some(field::Value::Array(field::value::Array::Integer(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.iter().flatten().cloned().collect())
} else {
Expand All @@ -78,11 +90,14 @@
}

/// Extract an `Vec<FromStr>` field from a record encoded as a pipe symbol separated string.
pub fn get_vec<T>(record: &noodles_vcf::Record, name: &str) -> Result<Vec<T>, anyhow::Error>
pub fn get_vec<T>(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<T>, anyhow::Error>
where
T: FromStr,
{
if let Some(Some(field::Value::String(v))) = record.info().get(&field::Key::from_str(name)?) {
if let Some(Some(field::Value::String(v))) = record.info().get(name) {
v.split('|')
.map(|s| s.parse())
.collect::<Result<Vec<_>, _>>()
Expand All @@ -94,12 +109,15 @@

/// Extract an `Vec<Vec<FromStr>>` field from a record encoded as a list of pipe symbol
/// separated string.
pub fn get_vec_vec<T>(record: &noodles_vcf::Record, name: &str) -> Result<Vec<T>, anyhow::Error>
pub fn get_vec_vec<T>(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<T>, anyhow::Error>
where
T: FromStr,
{
if let Some(Some(field::Value::Array(field::value::Array::String(value)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)

Check warning on line 120 in src/common/noodles.rs

View check run for this annotation

Codecov / codecov/patch

src/common/noodles.rs#L120

Added line #L120 was not covered by tests
{
Ok(value
.iter()
Expand Down
9 changes: 5 additions & 4 deletions src/dbsnp/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use clap::Parser;
use indicatif::ParallelProgressIterator;
use noodles_csi::BinningIndex as _;
use noodles_vcf::header::record;
use noodles_vcf::variant::RecordBuf;
use prost::Message;
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};

Expand Down Expand Up @@ -105,7 +106,7 @@ fn process_window(
let cf_dbsnp = db.cf_handle(&args.cf_name).unwrap();
let cf_dbsnp_by_rsid = db.cf_handle(&args.cf_name_by_rsid).unwrap();
let mut reader =
noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
let header = reader.read_header()?;

let raw_region = format!("{}:{}-{}", chrom, begin + 1, end);
Expand All @@ -130,10 +131,10 @@ fn process_window(
// exist).
if let Some(query) = query {
for result in query {
let vcf_record = result?;
let vcf_record = RecordBuf::try_from_variant_record(&header, &result?)?;

// Process each alternate allele into one record.
for allele_no in 0..vcf_record.alternate_bases().len() {
for allele_no in 0..vcf_record.alternate_bases().as_ref().len() {
let key_buf: Vec<u8> =
common::keys::Var::from_vcf_allele(&vcf_record, allele_no).into();
let record = dbsnp::pbs::Record::from_vcf_allele(&vcf_record, allele_no)?;
Expand All @@ -158,7 +159,7 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error>
tracing::info!("Opening dbSNP VCF file...");
let before_loading = std::time::Instant::now();
let mut reader_vcf =
noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
let header = reader_vcf.read_header()?;
let dbsnp_reference = if let record::value::Collection::Unstructured(values) = header
.other_records()
Expand Down
24 changes: 13 additions & 11 deletions src/dbsnp/pbs.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
//! Data structures for (de-)serialization as generated by `prost-build`.

use std::str::FromStr;
use noodles_vcf::variant::record::AlternateBases;

pub use crate::pbs::dbsnp::Record;
use noodles_vcf::record::info::field;
use noodles_vcf::variant::record_buf::info::field;

impl Record {
/// Creates a new `Record` from a VCF record and allele number.
pub fn from_vcf_allele(
record: &noodles_vcf::record::Record,
record: &noodles_vcf::variant::RecordBuf,
allele_no: usize,
) -> Result<Self, anyhow::Error> {
let chrom = record.chromosome().to_string();
let pos: usize = record.position().into();
let pos: i32 = pos.try_into()?;
let chrom = record.reference_sequence_name().to_string();
let pos: usize = record
.variant_start()
.expect("Telomeric breakends not supported")
.get();
let pos: i32 = i32::try_from(pos)?;
let ref_allele = record.reference_bases().to_string();
let alt_allele = record
.alternate_bases()
.get(allele_no)
.ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?
.iter()
.nth(allele_no)
.ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))??
.to_string();
let rs_id = if let Some(Some(field::Value::Integer(rs))) =
record.info().get(&field::Key::from_str("RS")?)
{
let rs_id = if let Some(Some(field::Value::Integer(rs))) = record.info().get("RS") {
*rs
} else {
anyhow::bail!("no rs id in dbSNP record")
Expand Down
Loading
Loading