Skip to content

Commit

Permalink
Merge pull request #253 from lgatto/writeMSData
Browse files Browse the repository at this point in the history
Add write,MSnExp and write,OnDiskMSnExp
  • Loading branch information
LaurentGatto authored Oct 17, 2017
2 parents b823f47 + c043544 commit 690c1af
Show file tree
Hide file tree
Showing 18 changed files with 764 additions and 21 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ Depends:
methods,
BiocGenerics (>= 0.7.1),
Biobase (>= 2.15.2),
mzR (>= 2.7.6),
mzR (>= 2.11.11),
BiocParallel,
ProtGenerics (>= 1.5.1)
ProtGenerics (>= 1.9.1)
Imports:
plyr,
IRanges,
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,8 @@ exportMethods(updateObject,
"$<-",
chromatogram,
"colnames<-",
reduce)
reduce,
writeMSData)

## methods NOT exported
## curveStats
Expand Down
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Changes in version 2.3.12
-------------------------
- Add write support for MSnExp and OnDiskMSnExp objects allowing to save the MS
data to mzML or mzXML files. <2017-09-15 Fri>

Changes in version 2.3.12
-------------------------
- Keep `protocolData` in isobaric quantification; fixes #265
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# MSnbase 2.3

## Changes in version 2.3.13
- Nothing yet.
- Add write support for MSnExp and OnDiskMSnExp objects allowing to save the MS
data to mzML or mzXML files. <2017-09-15 Fri>

## Changes in version 2.3.12
- Keep `protocolData` in isobaric quantification; fixes #265
Expand Down
1 change: 0 additions & 1 deletion R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,3 @@ setGeneric("splitByFile", function(object, f, ...) standardGeneric("splitByFile"
## setGeneric("productMz", function(object, value) standardGeneric("productMz"))
## setGeneric("aggregationFun", function(object, ...)
## standardGeneric("aggregationFun"))

6 changes: 3 additions & 3 deletions R/functions-OnDiskMSnExp.R
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ precursorValue_OnDiskMSnExp <- function(object, column) {
## Return an empty Chromatogram if there is no spectrum/scan
## within the retention time range.
if (!any(in_rt)) {
cur_res[[i]] <- Chromatogram(
cur_res[[i]] <- MSnbase::Chromatogram(
filterMz = mzm[i, ],
fromFile = as.integer(cur_file),
aggregationFun = aggFun)
Expand Down Expand Up @@ -627,7 +627,7 @@ precursorValue_OnDiskMSnExp <- function(object, column) {
if (!all(is.na(ints)))
mz_range <- range(allVals[-c(int_idx, mslevel_idx)],
na.rm = TRUE, finite = TRUE)
cur_res[[i]] <- Chromatogram(
cur_res[[i]] <- MSnbase::Chromatogram(
rtime = rts[in_rt],
intensity = ints,
mz = mz_range,
Expand Down Expand Up @@ -656,7 +656,7 @@ precursorValue_OnDiskMSnExp <- function(object, column) {
for (i in empties) {
empty_list <- vector(mode = "list", length = nrow(rt))
for(j in 1:nrow(rt)) {
empty_list[[j]] <- Chromatogram(filterMz = mz[j, ],
empty_list[[j]] <- MSnbase::Chromatogram(filterMz = mz[j, ],
fromFile = as.integer(i),
aggregationFun = aggregationFun)
}
Expand Down
74 changes: 74 additions & 0 deletions R/functions-Spectrum.R
Original file line number Diff line number Diff line change
Expand Up @@ -494,3 +494,77 @@ validSpectrum <- function(object) {
if (is.null(msg)) TRUE
else stop(msg)
}

#' @description `.spectrum_header` extracts the header information from a
#' `Spectrum` object and returns it as a named numeric vector.
#'
#' @note We can not get the following information from Spectrum
#' objects:
#' - ionisationEnergy
#' - mergedResultScanNum
#' - mergedResultStartScanNum
#' - mergedResultEndScanNum
#'
#' @param x `Spectrum` object.
#'
#' @return A named `numeric` with the following fields:
#' - acquisitionNum
#' - msLevel
#' - polarity
#' - peaksCount
#' - totIonCurrent
#' - retentionTime
#' - basePeakMZ
#' - collisionEnergy
#' - ionisationEnergy
#' - lowMZ
#' - highMZ
#' - precursorScanNum
#' - precursorMZ
#' - precurorCharge
#' - precursorIntensity
#' - mergedScan
#' - mergedResultScanNum
#' - mergedResultStartScanNum
#' - mergedResultEndScanNum
#' - injectionTime
#'
#' @author Johannes Rainer
#'
#' @md
#'
#' @noRd
.spectrum_header <- function(x) {
res <- c(acquisitionNum = acquisitionNum(x),
msLevel = msLevel(x),
polarity = polarity(x),
peaksCount = peaksCount(x),
totIonCurrent = tic(x),
retentionTime = rtime(x),
basePeakMZ = mz(x)[which.max(intensity(x))][1],
basePeakIntensity = max(intensity(x)),
collisionEnergy = 0,
ionisationEnergy = 0, # How to get that?
lowMZ = min(mz(x)),
highMZ = max(mz(x)),
precursorScanNum = 0,
precursorMZ = 0,
precursorCharge = 0,
precursorIntensity = 0,
mergedScan = 0,
mergedResultScanNum = 0, # ???
mergedResultStartScanNum = 0, # ???
mergedResultEndScanNum = 0, # ???
injectionTime = 0 # Don't have that
)
if (msLevel(x) > 1) {
res["collisionEnergy"] <- collisionEnergy(x)
res["precursorScanNum"] <- precScanNum(x)
res["precursorMZ"] <- precursorMz(x)
res["precursorCharge"] <- precursorCharge(x)
res["precursorIntensity"] <- precursorIntensity(x)
res["mergedScan"] <- x@merged
}
res
}

3 changes: 1 addition & 2 deletions R/methods-pSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,7 @@ setMethod("spectrapply", "pSet", function(object, FUN = NULL,
BPPARAM <- getBpParam(object, BPPARAM = BPPARAM)
if (is.null(FUN))
return(spectra(object))
vals <- bplapply(spectra(object), FUN = FUN, BPPARAM = BPPARAM, ...)
return(vals)
bplapply(spectra(object), FUN = FUN, BPPARAM = BPPARAM, ...)
})

setMethod("$", "pSet", function(x, name) {
Expand Down
80 changes: 80 additions & 0 deletions R/methods-write.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' @title Write MS data to mzML or mzXML files
#'
#' @aliases writeMSData
#'
#' @description The `writeMSData,MSnExp` and `writeMSData,OnDiskMSnExp` saves
#' the content of a [MSnExp] or [OnDiskMSnExp] object to MS file(s) in
#' either *mzML* or *mzXML* format.
#'
#' @details The `writeMSData` method uses the *proteowizard* libraries through
#' the `mzR` package to save the MS data. The data can be written to
#' *mzML* or *mzXML* files with or without copying additional metadata
#' information from the original files from which the data was read by the
#' [readMSData()] function. This can be set using the `copy` parameter.
#' Note that `copy = TRUE` requires the original files to be available and
#' is not supported for input files in other than mzML or mzXML format.
#' All metadata related to the run is copied, such as instrument
#' information, data processings etc. If `copy = FALSE` only processing
#' information performed in R (using `MSnbase`) are saved to the mzML file.
#'
#' Currently only spectrum data is supported, i.e. if the original mzML
#' file contains also chromatogram data it is not copied/saved to the new
#' mzML file.
#'
#' @note General spectrum data such as total ion current, peak count, base peak
#' m/z or base peak intensity are calculated from the actual spectrum data
#' before writing the data to the files.
#'
#' For MSn data, if the `OnDiskMSnExp` or `MSnExp` does not contain also
#' the precursor scan of a MS level > 1 spectrum (e.g. due to filtering on
#' the MS level) `precursorScanNum` is set to 0 in the output file to
#' avoid potentially linking to a wrong spectrum.
#'
#' @param object `OnDiskMSnExp` or `MSnExp` object.
#'
#' @param file `character` with the file name(s). Its length has to match the
#' number of samples/files of `x`.
#'
#' @param outformat `character(1)` defining the format of the output files.
#' Default output format is `"mzml"`.
#'
#' @param merge `logical(1)` whether the data should be saved into a single
#' *mzML* file. Default is `merge = FALSE`, i.e. each sample is saved to
#' a separate file. **Note**: `merge = TRUE` is not yet implemented.
#'
#' @param verbose `logical(1)` if progress messages should be displayed.
#'
#' @param copy `logical(1)` if metadata (data processings, original file names
#' etc) should be copied from the original files. See details for more
#' information.
#'
#' @param software_processing optionally provide specific data processing steps.
#' See documentation of the `software_processing` parameter of
#' [mzR::writeMSData()].
#'
#' @author Johannes Rainer
#'
#' @md
#'
#' @rdname writeMSData
setMethod("writeMSData", signature(object = "MSnExp", file = "character"),
function(object, file, outformat = c("mzml", "mzxml"),
merge = FALSE, verbose = isMSnbaseVerbose(), copy = FALSE,
software_processing = NULL) {
## Set copy to false if not all original files are available.
if (copy & !all(file.exists(fileNames(object)))) {
warning("Setting 'copy = FALSE' because the original files ",
"can not be found.")
copy <- FALSE
}
if (merge) {
stop("Not yet implemented.")
## Set copy to FALSE
## Call .writeMSDataMerged
} else {
.writeMSData(object = object, file = file,
outformat = outformat, verbose = verbose,
copy = copy, software_processing)
}
})

6 changes: 5 additions & 1 deletion R/options.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,9 @@ setMSnbaseFastLoad <- function(opt = TRUE) {

##' @rdname MSnbaseOptions
isMSnbaseFastLoad <- function() {
MSnbaseOptions()$fastLoad
fast_load <- MSnbaseOptions()$fastLoad
## For some odd reasons we get also NULL back - parallel processing?
if (!length(fast_load))
fast_load <- FALSE
fast_load
}
13 changes: 4 additions & 9 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,8 @@ setMethod("isEmpty", "environment",
isOnDisk <- function(object)
any(grepl("spectraProcessingQueue", slotNames(object)))

## Simple function to determine whether parallel or serial processing should be performed
## Simple function to determine whether parallel or serial processing should be
## performed.
## Check testthat/test_OnDiskMSnExp_benchmarks.R for performance comparisons.
## Parameter object is expected to beb a
getBpParam <- function(object, BPPARAM=bpparam()) {
Expand All @@ -1075,14 +1076,8 @@ getBpParam <- function(object, BPPARAM=bpparam()) {
## If it's empty, return SerialParam
if (length(object) == 0)
return(SerialParam())
## if (is(object, "OnDiskMSnExp")) {
## ## Return SerialParam if we access less than PARALLEL_THRESH spectra per file.
## if (mean(table(fData(object)$fileIdx)) < parallel_thresh)
## return(SerialParam())
## } else {
if (length(object) < parallel_thresh)
return(SerialParam())
## }
if (length(object) < parallel_thresh)
return(SerialParam())
return(BPPARAM)
}

Expand Down
Loading

0 comments on commit 690c1af

Please sign in to comment.