Skip to content

Commit

Permalink
Add combineSpectra,MSnExp method (issue #474)
Browse files Browse the repository at this point in the history
- Add the combineSpectra,MSnExp method.
- Add related documentation and unit tests.
- Remove warnings in meanMzInts and consensusSpectrum to avoid confusions for
  general input data (such as MS2 spectra from DIA data).
  • Loading branch information
jorainer committed Jul 2, 2019
1 parent 9e2d703 commit eb39f8a
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 38 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ Collate:
'AllClassUnions.R'
'AllGenerics.R'
'DataClasses.R'
'NAnnotatedDataFrame.R'
'MzTab.R'
'NAnnotatedDataFrame.R'
'NTR.R'
'RcppExports.R'
'TMT10.R'
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
- Remove message about changed meaning of the "modifications" argument in
calculateFragments' that was introduced in MSnbase 1.17.6 (2015-06-21).
<2019-06-01 Sat>
- Implement combineSpectra,MSnExp (see
[#474](https://github.com/lgatto/MSnbase/issues/474)) <2019-06-02 Tue>.

## MSnbase 2.11.2

Expand Down
24 changes: 17 additions & 7 deletions R/functions-Spectrum.R
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,8 @@ descendPeak <- function(mz, intensity, peakIdx = NULL, signalPercentage = 33,
meanMzInts <- function(x, ..., intensityFun = base::mean, weighted = FALSE,
main = 1L, mzd, ppm = 0, timeDomain = FALSE,
unionPeaks = TRUE) {
if (length(x) == 1)
return(x[[1]])
if (length(unique(unlist(lapply(x, function(z) z@msLevel)))) != 1)
stop("Can only combine spectra with the same MS level")
if (main > length(x) || main < 1)
Expand All @@ -979,10 +981,13 @@ meanMzInts <- function(x, ..., intensityFun = base::mean, weighted = FALSE,
mz_groups <- .group_mz_values(sqrt(mzs), mzd = mzd)
else
mz_groups <- .group_mz_values(mzs, mzd = mzd, ppm = ppm)
if (length(unique(mz_groups)) < length(x[[main]]@mz))
warning("Got less m/z groups than m/z values in the original spectrum.",
" Most likely the data is not profile-mode LCMS data or ",
"'mzd' is too large.")
## Disable the warning below, as this does not apply to all use cases: for
## arbitrary input data we can not expect the first spectrum to contain the
## largest number of peaks.
## if (length(unique(mz_groups)) < length(x[[main]]@mz))
## warning("Got less m/z groups than m/z values in the original spectrum.",
## " Most likely the data is not profile-mode LCMS data or ",
## "'mzd' is too large.")
ints <- unlist(base::lapply(x, function(z) z@intensity),
use.names = FALSE)[mz_order]
new_sp <- x[[main]]
Expand Down Expand Up @@ -1205,6 +1210,8 @@ meanMzInts <- function(x, ..., intensityFun = base::mean, weighted = FALSE,
#'
consensusSpectrum <- function(x, mzd, minProp = 0.5, intensityFun = base::max,
ppm = 0, ...) {
if (length(x) == 1)
return(x[[1]])
if (!is(x, "Spectra"))
x <- Spectra(x)
if (length(unique(msLevel(x))) != 1)
Expand All @@ -1220,9 +1227,12 @@ consensusSpectrum <- function(x, mzd, minProp = 0.5, intensityFun = base::max,
mzs <- mzs[keep]
ints <- ints[keep]
mz_groups <- .group_mz_values(mzs, mzd = mzd, ppm = ppm)
if (length(unique(mz_groups)) < sum(xnew@intensity > 0))
warning("Got less m/z groups than m/z groups in the first spectrum. ",
"Most likely `mzd` is too large.")
## Disable the warning below, as this does not apply to all use cases: for
## arbitrary input data we can not expect the first spectrum to contain the
## largest number of peaks.
## if (length(unique(mz_groups)) < sum(xnew@intensity > 0))
## warning("Got less m/z groups than m/z groups in the first spectrum. ",
## "Most likely `mzd` is too large.")
mzs <- split(mzs, mz_groups)
ints <- split(ints, mz_groups)
keep <- lengths(mzs) >= (length(x) * minProp)
Expand Down
47 changes: 47 additions & 0 deletions R/methods-MSnExp.R
Original file line number Diff line number Diff line change
Expand Up @@ -577,3 +577,50 @@ setAs("MSnExp", "Spectra", function(from) {
fdta <- fdta[, !colnames(fdta) %in% red_cn, drop = FALSE]
Spectra(spectra(from), elementMetadata = DataFrame(fdta))
})

#' @rdname combineSpectra
setMethod("combineSpectra", "MSnExp", function(object, fcol = "fileIdx",
method = meanMzInts, ...,
BPPARAM = bpparam()) {
BPPARAM <- getBpParam(object, BPPARAM = BPPARAM)
fns <- fileNames(object)
if (is(object, "MSnExp") && fcol == "fileIdx")
fData(object)$fileIdx <- fromFile(object)
objs <- split(object, fromFile(object))
dots <- list(...)
res <- bplapply(objs, function(z, fcol, fns, dots) {
sps <- do.call(
combineSpectra,
args = c(list(
object = Spectra(spectra(z), elementMetadata = DataFrame(fData(z))),
fcol = fcol, method = method), dots))
ff <- match(fileNames(z), fns)
sps@listData <- lapply(sps@listData, function(x) {
x@fromFile <- ff
x
})
mcols(sps)$fileIdx <- ff
sps
}, fcol = fcol, fns = fns, dots = dots, BPPARAM = BPPARAM)
names(res) <- NULL
fdta <- do.call(rbind, lapply(res, function(z) z@elementMetadata))
res <- unlist(lapply(res, function(z) z@listData))
rownames(fdta) <- names(res)
msn <- new("MSnExp")
msn@featureData <- AnnotatedDataFrame(as.data.frame(fdta))
msn@assayData <- list2env(res)
lockEnvironment(msn@assayData, bindings = TRUE)
msn@phenoData <- object@phenoData
msn@experimentData <- object@experimentData
msn@protocolData <- object@protocolData
msn@processingData <- object@processingData
msn@processingData@processing <- c(
msn@processingData@processing,
paste0("Spectra combined based on feature variable '", fcol, "' [",
date(), "]"))
.cacheEnv <- setCacheEnv(list("assaydata" = msn@assayData, "hd" = NULL),
level = 0, lock = TRUE)
msn@.cache = .cacheEnv
validObject(msn)
msn
})
63 changes: 50 additions & 13 deletions R/methods-Spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ setMethod("show", "Spectra", function(object) {
#'
#' - `smooth` *smooths* spectra. See [smooth()] for more details.
#'
#' @section Filtering and subsetting
#' @section Filtering and subsetting:
#'
#' - `[` can be used to subset the `Spectra` object.
#'
Expand Down Expand Up @@ -419,27 +419,32 @@ setMethod("smooth", "Spectra", function(x, method = c("SavitzkyGolay",
x
})




#' `combineSpectra` combines spectra in a [MSnExp-class] or [Spectra-class]
#' object applying the summarization function `fun` to sets of spectra defined
#' by a factor (`fcol` parameter). The resulting combined spectrum for each set
#' contains metadata information (present in `mcols` and all spectrum
#' information other than `mz` and `intensity`) from the first spectrum in each
#' set.
#' @title Combine Spectra
#'
#' @description
#'
#' `combineSpectra` combines spectra in a [MSnExp-class], [OnDiskMSnExp-class]
#' or [Spectra-class] object applying the summarization function `fun` to sets
#' of spectra defined by a factor (`fcol` parameter). The resulting combined
#' spectrum for each set contains metadata information (present in `mcols` and
#' all spectrum information other than `mz` and `intensity`) from the first
#' spectrum in each set.
#'
#' Combining of spectra for [MSnExp-class] or [OnDiskMSnExp-class] objects is
#' performed by default for each file **separately**, combining of spectra
#' across files is thus not possible. See examples for details.
#'
#' @aliases combineSpectra
#'
#' @rdname combineSpectra
#'
#' @title Combine Spectra
#'
#' @param object A [MSnExp-class] or [Spectra-class]
#'
#' @param fcol For `Spectra` objects: `mcols` column name to be used to define
#' the sets of spectra to be combined. If missing, all spectra are
#' considered to be one set.
#' considered to be one set. For `MSnExp`/`OnDiskMSnExp` objects: column
#' in `fData(object)` defining which spectra to combine. See examples below
#' for more details.
#'
#' @param fun *Deprecated* use `method` instead.
#'
Expand All @@ -449,6 +454,10 @@ setMethod("smooth", "Spectra", function(x, method = c("SavitzkyGolay",
#'
#' @param ... additional arguments for `fun`.
#'
#' @param BPPARAM For `MSnExp`/`OnDiskMSnExp` objects: parallel processing setup
#' to perform per-file parallel spectra combining. See [bpparam()] for more
#' details.
#'
#' @return A `Spectra` or `MSnExp` object with combined spectra. Metadata
#' (`mcols`) and all spectrum attributes other than `mz` and `intensity`
#' are taken from the first `Spectrum` in each set.
Expand Down Expand Up @@ -511,6 +520,34 @@ setMethod("smooth", "Spectra", function(x, method = c("SavitzkyGolay",
#' col = "black")
#' plot(mz(res[[2]]), intensity(res[[2]]), xlim = range(mzs[5:25]), type = "h",
#' col = "black")
#'
#' ## Combining spectra of an MSnExp/OnDiskMSnExp objects
#' ## Reading data from 2 mzML files
#' sciex <- readMSData(dir(system.file("sciex", package = "msdata"),
#' full.names = TRUE), mode = "onDisk")
#'
#' ## Filter the file to a retention time range from 2 to 20 seconds (to reduce
#' ## execution time of the example)
#' sciex <- filterRt(sciex, rt = c(2, 20))
#' table(fromFile(sciex))
#'
#' ## We have thus 64 spectra per file.
#'
#' ## In the example below we combine spectra measured in one second to a
#' ## single spectrum. We thus first define the grouping variable and add that
#' ## to the `fData` of the object. For combining, we use the
#' ## `consensusSpectrum` function that combines the spectra keeping only peaks
#' ## that were found in 50% of the spectra; by defining `mzd = 0.01` all peaks
#' ## within an m/z of 0.01 are evaluated for combining.
#' seconds <- round(rtime(sciex))
#' head(seconds)
#' fData(sciex)$second <- seconds
#'
#' res <- combineSpectra(sciex, fcol = "second", mzd = 0.01, minProp = 0.1,
#' method = consensusSpectrum)
#' table(fromFile(res))
#'
#' ## The data was reduced to 19 spectra for each file.
setMethod("combineSpectra", "Spectra", function(object, fcol,
method = meanMzInts, fun, ...) {
if (!missing(fun)) {
Expand Down
3 changes: 2 additions & 1 deletion R/readMSData2.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ readMSData2 <- function(files,
verbose = isMSnbaseVerbose(),
centroided.,
smoothed. = NA) {
.Defunct(new = "readMSData")
.Defunct(new = "readMSData")
}


Expand Down Expand Up @@ -66,6 +66,7 @@ readOnDiskMSData <- function(files, pdata, msLevel., verbose,
nrow(fdData)))
}
## Order the fdData by acquisitionNum to force use of acquisitionNum

## as unique ID for the spectrum (issue #103). That way we can use
## the spIdx (is the index of the spectrum within the file) for
## subsetting and extracting.
Expand Down
11 changes: 11 additions & 0 deletions man/Spectra.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 53 additions & 11 deletions man/combineSpectra.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 17 additions & 2 deletions tests/testthat/test_MSnExp.R
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,8 @@ test_that("combineSpectraMovingWindow works", {
## Use pre-calculated mzd:
mzd <- estimateMzScattering(od)
## If mzd is estimated on mz and combination on sqrt(mz) it will fail.
expect_warning(od_comb <- combineSpectraMovingWindow(
od, mzd = mzd[[4]], timeDomain = TRUE))
od_comb <- combineSpectraMovingWindow(
od, mzd = mzd[[4]], timeDomain = TRUE)
## All on m/z scale
od_comb <- combineSpectraMovingWindow(od, mzd = mzd[[4]],
timeDomain = FALSE)
Expand Down Expand Up @@ -600,3 +600,18 @@ test_that("isolationWindowLowerMz, isolationWindowUpperMz work", {
expect_error(isolationWindowLowerMz(tmt_im_ms2_sub), "not available")
expect_error(isolationWindowUpperMz(tmt_im_ms2_sub), "not available")
})

test_that("combineSpectra,MSnExp works", {
res <- combineSpectra(tmt_im_ms2_sub)
expect_true(is(res, "MSnExp"))
expect_true(length(res) == 1)
expect_equal(res[[1]],
combineSpectra(as(tmt_im_ms2_sub, "Spectra"))@listData[[1]])
res2 <- combineSpectra(tmt_im_ms2_sub, mzd = 0.1)
expect_true(peaksCount(res2) < peaksCount(res))
res3 <- combineSpectra(tmt_im_ms2_sub, mzd = 0.01, minProp = 0.1,
method = consensusSpectrum)
expect_true(peaksCount(res3) < peaksCount(res2))

expect_error(combineSpectra(tmt_im_ms2_sub, fcol = "other"))
})
6 changes: 6 additions & 0 deletions tests/testthat/test_OnDiskMSnExp.R
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,9 @@ test_that("isolationWindowLowerMz,isolationWindowUpperMz,OnDiskMSnExp works", {
expect_true(all(is.na(mz_low)))
expect_true(all(is.na(mz_high)))
})

test_that("combineSpectra,MSnExp works with OnDiskMSnExp", {
res <- combineSpectra(filterRt(sciex, c(10, 20)))
expect_true(is(res, "MSnExp"))
expect_true(length(res) == 2)
})
Loading

0 comments on commit eb39f8a

Please sign in to comment.