Skip to content

Commit

Permalink
chore: updates noodles to 0.34.0
Browse files Browse the repository at this point in the history
  • Loading branch information
claymcleod committed Mar 24, 2023
1 parent 7ee66b4 commit 569272b
Show file tree
Hide file tree
Showing 13 changed files with 123 additions and 85 deletions.
60 changes: 26 additions & 34 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ git-testament = "0.2.1"
indexmap = "1.9.1"
indicatif = "0.16.2"
itertools = "0.10.5"
noodles = { version = "0.29.0", features = [
noodles = { version = "0.34.0", features = [
"async",
"bam",
"bgzf",
Expand All @@ -40,7 +40,7 @@ regex = "1.5.5"
rust-lapper = "1.0.1"
serde = { version = "1.0.137", features = ["derive"] }
serde_json = { version = "1.0.81", features = ["preserve_order"] }
tokio = { version = "1.18.0", features = ["fs", "rt-multi-thread"] }
tokio = { version = "1.18.0", features = ["fs", "io-std", "rt-multi-thread"] }
tracing = "0.1.34"
tracing-subscriber = "0.3.11"

Expand Down
9 changes: 5 additions & 4 deletions src/convert/bam.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Conversions from a BAM file to other next-generation sequencing file formats.
use std::io;
use std::num::NonZeroUsize;
use std::path::PathBuf;

use anyhow::Context;
Expand Down Expand Up @@ -41,7 +42,7 @@ pub async fn to_sam_async(
let mut record = Record::default();

// (4) Write each record in the BAM file to the SAM file.
while reader.read_record(&mut record).await? != 0 {
while reader.read_record(&header.parsed, &mut record).await? != 0 {
writer
.write_alignment_record(&header.parsed, &record)
.await?;
Expand Down Expand Up @@ -83,8 +84,8 @@ pub async fn to_cram_async(
let name = name_as_string.parse()?;
let length = record.sequence().len();

let reference_sequence = Map::<ReferenceSequence>::new(name, length)?;
reference_sequences.insert(name_as_string, reference_sequence);
let reference_sequence = Map::<ReferenceSequence>::new(NonZeroUsize::try_from(length)?);
reference_sequences.insert(name, reference_sequence);
}

let repository = fasta::Repository::new(records);
Expand All @@ -106,7 +107,7 @@ pub async fn to_cram_async(

// (6) Write each record in the BAM file to the CRAM file.
info!("Writing records to CRAM file.");
while reader.read_record(&mut record).await? != 0 {
while reader.read_record(&header.parsed, &mut record).await? != 0 {
let cram_record = cram::Record::try_from_alignment_record(&header.parsed, &record)?;
writer
.write_record(&header.parsed, cram_record)
Expand Down
5 changes: 3 additions & 2 deletions src/convert/sam.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Conversions from a SAM file to other next-generation sequencing file formats.
use std::io;
use std::num::NonZeroUsize;
use std::path::PathBuf;

use anyhow::Context;
Expand Down Expand Up @@ -101,8 +102,8 @@ pub async fn to_cram_async(
let name = name_as_string.parse()?;
let length = record.sequence().len();

let reference_sequence = Map::<ReferenceSequence>::new(name, length)?;
reference_sequences.insert(name_as_string, reference_sequence);
let reference_sequence = Map::<ReferenceSequence>::new(NonZeroUsize::try_from(length)?);
reference_sequences.insert(name, reference_sequence);
}

let repository = fasta::Repository::new(records);
Expand Down
7 changes: 4 additions & 3 deletions src/derive/command/instrument.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ async fn app(src: PathBuf, first_n_reads: Option<usize>) -> anyhow::Result<()> {
let mut instrument_names = HashSet::new();
let mut flowcell_names = HashSet::new();

let ParsedBAMFile { mut reader, .. } =
crate::utils::formats::bam::open_and_parse(src, IndexCheck::Full)?;
let ParsedBAMFile {
mut reader, header, ..
} = crate::utils::formats::bam::open_and_parse(src, IndexCheck::Full)?;

// (1) Collect instrument names and flowcell names from reads within the
// file. Support for sampling only a portion of the reads is provided.
Expand All @@ -66,7 +67,7 @@ async fn app(src: PathBuf, first_n_reads: Option<usize>) -> anyhow::Result<()> {
sample_max = s;
}

for result in reader.records() {
for result in reader.records(&header.parsed) {
let record = result?;

if let Some(read_name) = record.read_name() {
Expand Down
2 changes: 1 addition & 1 deletion src/index/bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub fn index(src: PathBuf) -> anyhow::Result<()> {
let mut counter = RecordCounter::new();

loop {
match reader.read_record(&mut record) {
match reader.read_record(&header.parsed, &mut record) {
Ok(0) => break,
Ok(_) => {}
Err(e) => bail!("failed to read record: {}", e),
Expand Down
20 changes: 17 additions & 3 deletions src/qc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use itertools::Itertools;
use noodles::sam;
use noodles::sam::header::record::value::map::ReferenceSequence;
use noodles::sam::header::record::value::Map;
use noodles::sam::record::ReferenceSequenceName;
use noodles::sam::Header;
use sam::alignment::Record;

Expand Down Expand Up @@ -200,14 +201,27 @@ pub trait SequenceBasedQualityControlFacet {
fn supports_sequence_name(&self, name: &str) -> bool;

/// Sets up a quality control facet for a given sequence.
fn setup(&mut self, sequence: &Map<ReferenceSequence>) -> anyhow::Result<()>;
fn setup(
&mut self,
name: &ReferenceSequenceName,
sequence: &Map<ReferenceSequence>,
) -> anyhow::Result<()>;

/// Processes a sequence for a quality control facet.
fn process(&mut self, seq: &Map<ReferenceSequence>, record: &Record) -> anyhow::Result<()>;
fn process(
&mut self,
name: &ReferenceSequenceName,
sequence: &Map<ReferenceSequence>,
record: &Record,
) -> anyhow::Result<()>;

/// Tears down any machinery that was built up for this sequence within the
/// quality control facet.
fn teardown(&mut self, sequence: &Map<ReferenceSequence>) -> anyhow::Result<()>;
fn teardown(
&mut self,
name: &ReferenceSequenceName,
sequence: &Map<ReferenceSequence>,
) -> anyhow::Result<()>;

/// Adds the results of this quality control facet to the global
/// [`results::Results`] object for writing to a file.
Expand Down
14 changes: 7 additions & 7 deletions src/qc/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ fn app(
if !supported_sequences
.iter()
.map(|s| s.name())
.any(|x| x == sequence)
.any(|x| x == *sequence)
{
bail!(
"Sequence \"{}\" not found in specified reference genome. \
Expand Down Expand Up @@ -302,7 +302,7 @@ fn app(
info!("Starting first pass for QC stats.");
let mut counter = RecordCounter::new();

for result in reader.records() {
for result in reader.records(&header.parsed) {
let record = result?;

for facet in &mut record_facets {
Expand Down Expand Up @@ -361,22 +361,22 @@ fn app(
debug!(" [*] Setting up sequence.");
for facet in &mut sequence_facets {
if facet.supports_sequence_name(name) {
facet.setup(seq)?;
facet.setup(name, seq)?;
}
}

let query = reader.query(
header.parsed.reference_sequences(),
&header.parsed,
&index,
&Region::new(name, start..=end),
&Region::new(name.to_string(), start..=end),
)?;

debug!(" [*] Processing records from sequence.");
for result in query {
let record = result?;
for facet in &mut sequence_facets {
if facet.supports_sequence_name(name) {
facet.process(seq, &record)?;
facet.process(name, seq, &record)?;
}
}

Expand All @@ -390,7 +390,7 @@ fn app(
debug!(" [*] Tearing down sequence.");
for facet in &mut sequence_facets {
if facet.supports_sequence_name(name) {
facet.teardown(seq)?;
facet.teardown(name, seq)?;
}
}
}
Expand Down
Loading

0 comments on commit 569272b

Please sign in to comment.