Skip to content

Commit

Permalink
fix: update noodles group (#447)
Browse files Browse the repository at this point in the history
* update noodles group

* update insta snapshot for test_multiquery
  • Loading branch information
tedil authored May 8, 2024
1 parent 41512ca commit 683c84d
Show file tree
Hide file tree
Showing 26 changed files with 673 additions and 1,144 deletions.
52 changes: 39 additions & 13 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ indexmap = { version = "2.2", features = ["serde"] }
indicatif = { version = "0.17", features = ["rayon"] }
log = "0.4"
noodles-bed = "0.12"
noodles-bgzf = "0.26"
noodles-bgzf = "0.29"
noodles-core = "0.14"
noodles-csi = "0.30"
noodles-tabix = "0.36"
noodles-vcf = "0.49"
noodles-csi = "0.33"
noodles-tabix = "0.39"
noodles-vcf = "0.55"
pbjson = "0.6"
pbjson-types = "0.6"
prost = "0.12"
Expand All @@ -58,6 +58,7 @@ tracing-subscriber = "0.3"
rustc-hash = "1.1.0"
noodles-gff = "0.27.0"
erased-serde = "0.4.2"
itertools = "0.11.0"

[build-dependencies]
prost-build = "0.12"
Expand Down
16 changes: 8 additions & 8 deletions src/common/keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,19 @@ impl Var {
}

/// Create for all alternate alleles from the given VCF record.
pub fn from_vcf_allele(value: &noodles_vcf::Record, allele_no: usize) -> Self {
let chrom = match value.chromosome() {
noodles_vcf::record::Chromosome::Name(name)
| noodles_vcf::record::Chromosome::Symbol(name) => name.to_owned(),
};
let pos: usize = value.position().into();
let pos = pos as i32;
pub fn from_vcf_allele(value: &noodles_vcf::variant::RecordBuf, allele_no: usize) -> Self {
let chrom = value.reference_sequence_name().to_string();
let pos: usize = value
.variant_start()
.expect("Telomeric breakends not supported")
.get();
let pos = i32::try_from(pos).unwrap();
let reference = value.reference_bases().to_string();
Var {
chrom,
pos,
reference,
alternative: value.alternate_bases()[allele_no].to_string(),
alternative: value.alternate_bases().as_ref()[allele_no].to_string(),
}
}
}
Expand Down
58 changes: 38 additions & 20 deletions src/common/noodles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
use std::str::FromStr;

use noodles_vcf::record::info::field;
use noodles_vcf::variant::record_buf::info::field;

/// Extract a `String` field from a record.
pub fn get_string(record: &noodles_vcf::Record, name: &str) -> Result<String, anyhow::Error> {
if let Some(Some(field::Value::String(v))) = record.info().get(&field::Key::from_str(name)?) {
pub fn get_string(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<String, anyhow::Error> {
if let Some(Some(field::Value::String(v))) = record.info().get(name) {
Ok(v.to_string())
} else if let Some(Some(field::Value::Array(field::value::Array::String(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.first().unwrap().as_ref().unwrap().to_string())
} else {
Expand All @@ -18,19 +21,22 @@ pub fn get_string(record: &noodles_vcf::Record, name: &str) -> Result<String, an
}

/// Extract a flag field from a record.
pub fn get_flag(record: &noodles_vcf::Record, name: &str) -> Result<bool, anyhow::Error> {
pub fn get_flag(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<bool, anyhow::Error> {
Ok(matches!(
record.info().get(&field::Key::from_str(name)?),
record.info().get(name),
Some(Some(field::Value::Flag))
))
}

/// Extract an `i32` field from a record.
pub fn get_i32(record: &noodles_vcf::Record, name: &str) -> Result<i32, anyhow::Error> {
if let Some(Some(field::Value::Integer(v))) = record.info().get(&field::Key::from_str(name)?) {
pub fn get_i32(record: &noodles_vcf::variant::RecordBuf, name: &str) -> Result<i32, anyhow::Error> {
if let Some(Some(field::Value::Integer(v))) = record.info().get(name) {
Ok(*v)
} else if let Some(Some(field::Value::Array(field::value::Array::Integer(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.first().unwrap().unwrap())
} else {
Expand All @@ -39,11 +45,11 @@ pub fn get_i32(record: &noodles_vcf::Record, name: &str) -> Result<i32, anyhow::
}

/// Extract an `f32` field from a record.
pub fn get_f32(record: &noodles_vcf::Record, name: &str) -> Result<f32, anyhow::Error> {
if let Some(Some(field::Value::Float(v))) = record.info().get(&field::Key::from_str(name)?) {
pub fn get_f32(record: &noodles_vcf::variant::RecordBuf, name: &str) -> Result<f32, anyhow::Error> {
if let Some(Some(field::Value::Float(v))) = record.info().get(name) {
Ok(*v)
} else if let Some(Some(field::Value::Array(field::value::Array::Float(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.first().unwrap().unwrap())
} else {
Expand All @@ -54,9 +60,12 @@ pub fn get_f32(record: &noodles_vcf::Record, name: &str) -> Result<f32, anyhow::
/// Extract an `Vec<String>` field from record with an array field.
///
/// This is different than parsing the histograms from pipe-separated strings.
pub fn get_vec_str(record: &noodles_vcf::Record, name: &str) -> Result<Vec<String>, anyhow::Error> {
pub fn get_vec_str(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<String>, anyhow::Error> {
if let Some(Some(field::Value::Array(field::value::Array::String(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.iter().flatten().cloned().collect())
} else {
Expand All @@ -67,9 +76,12 @@ pub fn get_vec_str(record: &noodles_vcf::Record, name: &str) -> Result<Vec<Strin
/// Extract an `Vec<i32>` field from record with an array field.
///
/// This is different than parsing the histograms from pipe-separated strings.
pub fn get_vec_i32(record: &noodles_vcf::Record, name: &str) -> Result<Vec<i32>, anyhow::Error> {
pub fn get_vec_i32(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<i32>, anyhow::Error> {
if let Some(Some(field::Value::Array(field::value::Array::Integer(vs)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(vs.iter().flatten().cloned().collect())
} else {
Expand All @@ -78,11 +90,14 @@ pub fn get_vec_i32(record: &noodles_vcf::Record, name: &str) -> Result<Vec<i32>,
}

/// Extract an `Vec<FromStr>` field from a record encoded as a pipe symbol separated string.
pub fn get_vec<T>(record: &noodles_vcf::Record, name: &str) -> Result<Vec<T>, anyhow::Error>
pub fn get_vec<T>(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<T>, anyhow::Error>
where
T: FromStr,
{
if let Some(Some(field::Value::String(v))) = record.info().get(&field::Key::from_str(name)?) {
if let Some(Some(field::Value::String(v))) = record.info().get(name) {
v.split('|')
.map(|s| s.parse())
.collect::<Result<Vec<_>, _>>()
Expand All @@ -94,12 +109,15 @@ where

/// Extract an `Vec<Vec<FromStr>>` field from a record encoded as a list of pipe symbol
/// separated string.
pub fn get_vec_vec<T>(record: &noodles_vcf::Record, name: &str) -> Result<Vec<T>, anyhow::Error>
pub fn get_vec_vec<T>(
record: &noodles_vcf::variant::RecordBuf,
name: &str,
) -> Result<Vec<T>, anyhow::Error>
where
T: FromStr,
{
if let Some(Some(field::Value::Array(field::value::Array::String(value)))) =
record.info().get(&field::Key::from_str(name)?)
record.info().get(name)
{
Ok(value
.iter()
Expand Down
9 changes: 5 additions & 4 deletions src/dbsnp/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use clap::Parser;
use indicatif::ParallelProgressIterator;
use noodles_csi::BinningIndex as _;
use noodles_vcf::header::record;
use noodles_vcf::variant::RecordBuf;
use prost::Message;
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};

Expand Down Expand Up @@ -105,7 +106,7 @@ fn process_window(
let cf_dbsnp = db.cf_handle(&args.cf_name).unwrap();
let cf_dbsnp_by_rsid = db.cf_handle(&args.cf_name_by_rsid).unwrap();
let mut reader =
noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
let header = reader.read_header()?;

let raw_region = format!("{}:{}-{}", chrom, begin + 1, end);
Expand All @@ -130,10 +131,10 @@ fn process_window(
// exist).
if let Some(query) = query {
for result in query {
let vcf_record = result?;
let vcf_record = RecordBuf::try_from_variant_record(&header, &result?)?;

// Process each alternate allele into one record.
for allele_no in 0..vcf_record.alternate_bases().len() {
for allele_no in 0..vcf_record.alternate_bases().as_ref().len() {
let key_buf: Vec<u8> =
common::keys::Var::from_vcf_allele(&vcf_record, allele_no).into();
let record = dbsnp::pbs::Record::from_vcf_allele(&vcf_record, allele_no)?;
Expand All @@ -158,7 +159,7 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error>
tracing::info!("Opening dbSNP VCF file...");
let before_loading = std::time::Instant::now();
let mut reader_vcf =
noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?;
let header = reader_vcf.read_header()?;
let dbsnp_reference = if let record::value::Collection::Unstructured(values) = header
.other_records()
Expand Down
24 changes: 13 additions & 11 deletions src/dbsnp/pbs.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
//! Data structures for (de-)serialization as generated by `prost-build`.
use std::str::FromStr;
use noodles_vcf::variant::record::AlternateBases;

pub use crate::pbs::dbsnp::Record;
use noodles_vcf::record::info::field;
use noodles_vcf::variant::record_buf::info::field;

impl Record {
/// Creates a new `Record` from a VCF record and allele number.
pub fn from_vcf_allele(
record: &noodles_vcf::record::Record,
record: &noodles_vcf::variant::RecordBuf,
allele_no: usize,
) -> Result<Self, anyhow::Error> {
let chrom = record.chromosome().to_string();
let pos: usize = record.position().into();
let pos: i32 = pos.try_into()?;
let chrom = record.reference_sequence_name().to_string();
let pos: usize = record
.variant_start()
.expect("Telomeric breakends not supported")
.get();
let pos: i32 = i32::try_from(pos)?;
let ref_allele = record.reference_bases().to_string();
let alt_allele = record
.alternate_bases()
.get(allele_no)
.ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?
.iter()
.nth(allele_no)
.ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))??
.to_string();
let rs_id = if let Some(Some(field::Value::Integer(rs))) =
record.info().get(&field::Key::from_str("RS")?)
{
let rs_id = if let Some(Some(field::Value::Integer(rs))) = record.info().get("RS") {
*rs
} else {
anyhow::bail!("no rs id in dbSNP record")
Expand Down
Loading

0 comments on commit 683c84d

Please sign in to comment.