Skip to content

Commit

Permalink
Add support for mzMLb
Browse files Browse the repository at this point in the history
  • Loading branch information
mobiusklein committed Jan 13, 2024
1 parent 5e616d2 commit d112479
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 56 deletions.
4 changes: 3 additions & 1 deletion crates/sage-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ path = "src/main.rs"

[features]

default = []

mzmlb = ["sage-cloudpath/mzdata"]


[dependencies]
sage-core = { path = "../sage" }
sage-cloudpath = { path = "../sage-cloudpath", features = ["parquet"] }
sage-cloudpath = { path = "../sage-cloudpath", features = ["parquet", "mzdata"] }

anyhow = "1.0"
csv = "1"
Expand Down
12 changes: 11 additions & 1 deletion crates/sage-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,22 @@ impl Runner {

let path_lower = path.to_lowercase();
let res = if path_lower.ends_with(".mgf.gz") || path_lower.ends_with(".mgf") {
sage_cloudpath::util::read_mgf(path_lower, file_id)
sage_cloudpath::util::read_mgf(path, file_id)
} else if bruker_extensions
.iter()
.any(|ext| path_lower.ends_with(ext))
{
sage_cloudpath::util::read_tdf(path, file_id)
} else if path_lower.ends_with(".mzmlb") {
#[cfg(feature = "mzmlb")]
{
sage_cloudpath::util::read_mzmlb(path, file_id, sn)
}
#[cfg(not(feature = "mzmlb"))]
{
// Fall back to prior behavior
sage_cloudpath::util::read_mzml(path, file_id, sn)
}
} else {
sage_cloudpath::util::read_mzml(path, file_id, sn)
};
Expand Down
3 changes: 3 additions & 0 deletions crates/sage-cloudpath/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ license = "MIT"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = []

[dependencies]
thiserror = "1.0"
async-compression = { version = "0.3", features = ["tokio", "gzip", "zlib"] }
Expand Down
4 changes: 2 additions & 2 deletions crates/sage-cloudpath/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ use tokio::io::{AsyncBufRead, AsyncRead, AsyncWriteExt, BufReader};

pub mod mgf;
pub mod mzml;
pub mod tdf;
pub mod util;
#[cfg(feature = "mzdata")]
pub mod mzmlb;
pub mod tdf;
pub mod util;

#[cfg(feature = "parquet")]
pub mod parquet;
Expand Down
123 changes: 78 additions & 45 deletions crates/sage-cloudpath/src/mzmlb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
use std::io;
use std::path::Path;

use mzdata::io::MzMLbReader as MzMLbReaderImpl;
use mzdata::prelude::*;
use mzdata::RawSpectrum as RawSpectrumImpl;
use mzdata::io::prelude::*;
use mzdata::io::{MzMLbReader as MzMLbReaderImpl};
use sage_core::mass::Tolerance;
use sage_core::spectrum::{RawSpectrum, Precursor};

use sage_core::spectrum::{Precursor, RawSpectrum};

pub struct MzMLbReader {
ms_level: Option<u8>,
Expand All @@ -20,7 +19,7 @@ pub struct MzMLbReader {
}

impl MzMLbReader {
/// Create a new [`MzMlReader`] with a minimum MS level filter
/// Create a new [`MzMLbReader`] with a minimum MS level filter
///
/// # Example
///
Expand Down Expand Up @@ -51,48 +50,82 @@ impl MzMLbReader {
self
}

pub fn parse<B>(&self, b: B) -> Result<Vec<RawSpectrum>, io::Error> where B: AsRef<Path> {
pub fn parse<B>(&self, b: B) -> Result<Vec<RawSpectrum>, io::Error>
where
B: AsRef<Path>,
{
let reader = MzMLbReaderImpl::new(&b)?;

let spectra = reader.into_iter().filter(|scan| {
if let Some(ms_level) = self.ms_level {
return scan.ms_level() == ms_level
} else {
return true
}
}).map(|scan| {
let scan: RawSpectrumImpl = scan.into();
let mut precusors = Vec::new();
match scan.precursor() {
Some(p) => {
let p = Precursor {
mz: p.mz() as f32,
intensity: Some(p.ion.intensity),
charge: p.ion.charge.and_then(|v| { Some(v as u8) }),
spectrum_ref: p.precursor_id.clone(),
isolation_window: Some(Tolerance::Da(p.isolation_window.lower_bound as f32, p.isolation_window.upper_bound as f32))
};
precusors.push(p)
},
None => {}
}
RawSpectrum {
file_id: self.file_id,
ms_level: scan.ms_level(),
id: scan.description.id.clone(),
precursors: precusors,
ion_injection_time: scan.acquisition().first_scan().unwrap().injection_time,
representation: match scan.description.signal_continuity {
mzdata::spectrum::SignalContinuity::Unknown => sage_core::spectrum::Representation::Profile,
mzdata::spectrum::SignalContinuity::Centroid => sage_core::spectrum::Representation::Centroid,
mzdata::spectrum::SignalContinuity::Profile => sage_core::spectrum::Representation::Profile,
},
scan_start_time: scan.start_time() as f32,
total_ion_current: scan.peaks().tic(),
mz: scan.mzs().iter().map(|mz| { (*mz) as f32 }).collect(),
intensity: scan.intensities().to_vec(),
}
}).collect();
let spectra = reader
.into_iter()
.filter(|scan| {
if let Some(ms_level) = self.ms_level {
return scan.ms_level() == ms_level;
} else {
return true;
}
})
.map(|scan| {
let scan: RawSpectrumImpl = scan.into();
let mut precusors = Vec::new();
match scan.precursor() {
Some(p) => {
let p = Precursor {
mz: p.mz() as f32,
intensity: Some(p.ion.intensity),
charge: p.ion.charge.and_then(|v| Some(v as u8)),
spectrum_ref: p.precursor_id.clone(),
isolation_window: Some(Tolerance::Da(
p.isolation_window.lower_bound as f32,
p.isolation_window.upper_bound as f32,
)),
};
precusors.push(p)
}
None => {}
}
RawSpectrum {
file_id: self.file_id,
ms_level: scan.ms_level(),
id: scan.description.id.clone(),
precursors: precusors,
ion_injection_time: scan.acquisition().first_scan().unwrap().injection_time,
representation: match scan.description.signal_continuity {
mzdata::spectrum::SignalContinuity::Unknown => {
sage_core::spectrum::Representation::Profile
}
mzdata::spectrum::SignalContinuity::Centroid => {
sage_core::spectrum::Representation::Centroid
}
mzdata::spectrum::SignalContinuity::Profile => {
sage_core::spectrum::Representation::Profile
}
},
scan_start_time: scan.start_time() as f32,
total_ion_current: scan.peaks().tic(),
mz: scan.mzs().iter().map(|mz| (*mz) as f32).collect(),
intensity: scan.intensities().to_vec(),
}
})
.collect();
Ok(spectra)
}
}


#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_read_mzmlb() -> io::Result<()> {
let spectra = MzMLbReader::with_file_id(0).parse("../../tests/LQSRPAAPPAPGPGQLTLR.mzMLb")?;
assert_eq!(spectra.len(), 1);
let s = spectra.first().unwrap();
assert_eq!(s.id, "controllerType=0 controllerNumber=1 scan=30069");
assert_eq!(s.mz.len(), 299);
assert_eq!(s.intensity.len(), 299);
Ok(())
}

}
16 changes: 9 additions & 7 deletions crates/sage-cloudpath/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,21 @@ pub fn read_tdf<S: AsRef<str>>(s: S, file_id: usize) -> Result<Vec<RawSpectrum>,
}
}


#[cfg(feature = "mzdata")]
pub fn read_mzmlb<S: AsRef<str>>(s: S, file_id: usize) -> Result<Vec<RawSpectrum>, Error> {
let res = crate::mzmlb::MzMLbReader::with_file_id(file_id).parse(s.as_ref());
pub fn read_mzmlb<S: AsRef<str>>(
s: S,
file_id: usize,
signal_to_noise: Option<u8>,
) -> Result<Vec<RawSpectrum>, Error> {
let res = crate::mzmlb::MzMLbReader::with_file_id(file_id)
.set_signal_to_noise(signal_to_noise)
.parse(s.as_ref());
match res {
Ok(spectra) => Ok(spectra),
Err(e) => Err(
Error::IO(e)
),
Err(e) => Err(Error::IO(e)),
}
}


pub fn read_mgf<S: AsRef<str>>(path: S, file_id: usize) -> Result<Vec<RawSpectrum>, Error> {
read_and_execute(path, |mut bf| async move {
let mut contents = String::new();
Expand Down
Binary file added tests/LQSRPAAPPAPGPGQLTLR.mzMLb
Binary file not shown.

0 comments on commit d112479

Please sign in to comment.