From 79e577c85f013e972023d06ddded46abe21ef280 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Thu, 26 Oct 2023 08:15:10 -0400 Subject: [PATCH] Add more timing tools --- Cargo.toml | 2 +- examples/async_mzcat.rs | 67 ++++++++++++++++++++++++++++++ examples/mzcat.rs | 91 +++++++++++++++++++++++++++++++++++++++++ src/io/mzml/async.rs | 1 + src/io/mzmlb/reader.rs | 4 +- 5 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 examples/async_mzcat.rs create mode 100644 examples/mzcat.rs diff --git a/Cargo.toml b/Cargo.toml index 9ef017f..2ac0f67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ lto = true debug = true [features] -default = ["intel-mkl", "parallelism", "plotting", "mzmlb", "zlib-ng-compat"] +default = ["intel-mkl", "parallelism", "plotting", "mzmlb", "zlib-ng-compat", ] openblas = ["mzsignal/openblas"] netlib = ["mzsignal/netlib"] diff --git a/examples/async_mzcat.rs b/examples/async_mzcat.rs new file mode 100644 index 0000000..ad3457c --- /dev/null +++ b/examples/async_mzcat.rs @@ -0,0 +1,67 @@ +use std::{io, path, env}; +use std::time; + +#[cfg(not(feature = "async"))] +compile_error!("This example requires the async feature"); + +use tokio; +use tokio::fs; + +use mzdata::io::mzml; +use mzdata::io::prelude::*; + +async fn load_file + Clone>(path: P) -> io::Result> { + let fh = fs::File::open(path.into()).await?; + let mut reader = mzml::AsyncMzMLReader::new(fh).await; + reader.read_index_from_end().await.expect("Failed to read index from the file"); + Ok(reader) +} + + +async fn scan_file( + reader: &mut mzml::AsyncMzMLReader, +) { + let start = time::Instant::now(); + let n = reader.len(); + let mut i = 0; + while let Some(scan) = reader.get_spectrum_by_index(i).await { + if i % 10000 == 0 { + println!( + "\tScan {}: {}|{} ({} seconds)", + i, + scan.id(), + scan.index(), + (time::Instant::now() - start).as_secs_f64(), + ); + } + i += 1; + if i == n { + break; + } + } + let end = time::Instant::now(); + println!("Loaded in {} spectra {} seconds", i, (end - start).as_secs_f64()); +} + + +#[tokio::main(flavor = "multi_thread", worker_threads = 10)] +async fn main() -> io::Result<()> { + let path = path::PathBuf::from( + env::args() + .skip(1) + .next() + .expect("Please pass an MS data file path"), + ); + if let Some(ext) = path.extension() { + if ext.to_string_lossy().to_lowercase() == "mzml" { + let mut reader = load_file(path).await?; + scan_file(&mut reader).await; + } else { + panic!("Could not infer the file format") + } + } else { + let mut reader = load_file(path).await?; + scan_file(&mut reader).await; + }; + Ok(()) +} \ No newline at end of file diff --git a/examples/mzcat.rs b/examples/mzcat.rs new file mode 100644 index 0000000..9f8a86c --- /dev/null +++ b/examples/mzcat.rs @@ -0,0 +1,91 @@ +use std::{io, path, env, fs}; +use std::time; + +use rayon::prelude::*; + +use mzdata::io::{mzml, mzmlb}; +use mzdata::io::prelude::*; +use mzdata::spectrum::MultiLayerSpectrum; +use mzpeaks::{CentroidPeak, DeconvolutedPeak}; + + +fn load_file + Clone>(path: P) -> io::Result> { + let reader = mzml::MzMLReader::open_path(path)?; + Ok(reader) +} + +#[cfg(feature = "mzmlb")] +fn load_mzmlb_file + Clone>(path: P) -> io::Result { + let reader = mzmlb::MzMLbReader::open_path(&path.into())?; + let blosc_threads = match std::env::var("BLOSC_NUM_THREADS") { + Ok(val) => { + match val.parse() { + Ok(nt) => nt, + Err(e) => { + eprintln!("Failed to parse BLOSC_NUM_THREADS env var: {}", e); + 4 + }, + } + }, + Err(_) => 4, + }; + mzmlb::MzMLbReader::set_blosc_nthreads(blosc_threads); + Ok(reader) +} + +fn scan_file< + R: MZFileReader< + CentroidPeak, + DeconvolutedPeak, + MultiLayerSpectrum + > + Iterator> + Send, +>( + reader: &mut R, +) { + let start = time::Instant::now(); + reader.enumerate().par_bridge().for_each(|(i, scan)| { + if i % 10000 == 0 { + println!( + "\tScan {}: {}|{} ({} seconds)", + i, + scan.id(), + scan.index(), + (time::Instant::now() - start).as_secs_f64(), + ); + } + }); + let end = time::Instant::now(); + println!("Loaded in {} seconds", (end - start).as_secs_f64()); +} + + +fn main() -> io::Result<()> { + let path = path::PathBuf::from( + env::args() + .skip(1) + .next() + .expect("Please pass an MS data file path"), + ); + if let Some(ext) = path.extension() { + if ext.to_string_lossy().to_lowercase() == "mzmlb" { + #[cfg(feature = "mzmlb")] + { + let mut reader = load_mzmlb_file(path)?; + scan_file(&mut reader) + } + #[cfg(not(feature = "mzmlb"))] + { + panic!("Cannot read mzMLb file. Recompile enabling the `mzmlb` feature") + } + } else if ext.to_string_lossy().to_lowercase() == "mzml" { + let mut reader = load_file(path)?; + scan_file(&mut reader) + } else { + panic!("Could not infer the file format") + } + } else { + let mut reader = load_file(path)?; + scan_file(&mut reader) + }; + Ok(()) +} \ No newline at end of file diff --git a/src/io/mzml/async.rs b/src/io/mzml/async.rs index cc22f94..970dfe7 100644 --- a/src/io/mzml/async.rs +++ b/src/io/mzml/async.rs @@ -4,6 +4,7 @@ use std::marker::PhantomData; use std::mem; use std::pin::Pin; +use super::MzMLSAX; use super::reader::{ MzMLParserError, MzMLParserState, SpectrumBuilding, Bytes, FileMetadataBuilder, MzMLSpectrumBuilder, diff --git a/src/io/mzmlb/reader.rs b/src/io/mzmlb/reader.rs index 4acdc68..9be25b4 100644 --- a/src/io/mzmlb/reader.rs +++ b/src/io/mzmlb/reader.rs @@ -483,11 +483,11 @@ impl Seek for ByteReader { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { match pos { io::SeekFrom::Start(offset) => { - self.position = (self.position + offset as usize).min(0); + self.position = (offset as usize).max(0); } io::SeekFrom::End(offset) => { let mut n = self.handle.size(); - self.position = (n + offset as usize).min(0); + self.position = (n + offset as usize).max(n); } io::SeekFrom::Current(offset) => { self.position = (self.position + offset as usize).max(0);