diff --git a/NAMESPACE b/NAMESPACE index 7c00b9e47..3b53a4ccd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,8 +33,9 @@ importFrom(ggplot2, geom_histogram, geom_text, geom_line, geom_point, facet_grid) importFrom(MALDIquant, binPeaks, match.closest) ## only using ::: functions -importFrom(S4Vectors, isEmpty, metadata, DataFrame, replaceROWS, SimpleList) -importMethodsFrom(S4Vectors, showAsCell, "mcols<-", mcols, split, endoapply) +importFrom(S4Vectors, isEmpty, metadata, DataFrame, replaceROWS, SimpleList, + endoapply) +importMethodsFrom(S4Vectors, showAsCell, "mcols<-", mcols, split) importClassesFrom(S4Vectors, SimpleList) importFrom(MASS, rlm) @@ -146,7 +147,8 @@ export(MSnSet, requiredFvarLabels, addMSnSetMetadata, readSRMData, - combineSpectra, + meanMzInts, + consensusSpectrum, combineSpectraMovingWindow, estimateMzScattering, hasSpectra, hasChromatograms, @@ -307,7 +309,8 @@ exportMethods(updateObject, reduce, writeMSData, productMz, - estimateMzResolution + estimateMzResolution, + combineSpectra ) ## methods NOT exported diff --git a/NEWS b/NEWS index a42658818..bebe9eb07 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ Changes in version 2.7.10 - Methods for Spectra class <2018-10-15 Mon> +- Change default for `timeDomain` in `combineSpectra` and + `combineSpectraMovingWindow` to `FALSE` <2018-10-18 Thu> +- Add new spectra combination function `consensusSpectrum` <2018-10-24 Wed> Changes in version 2.7.9 - Import rather than depend on BiocParallel <2018-10-15 Mon> diff --git a/NEWS.md b/NEWS.md index ced4eb724..24261504a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,9 @@ ## Changes in version 2.7.10 - Methods for Spectra class <2018-10-15 Mon> +- Change default for `timeDomain` in `combineSpectra` and + `combineSpectraMovingWindow` to `FALSE` <2018-10-18 Thu> +- Add new spectra combination function `consensusSpectrum` <2018-10-24 Wed> ## Changes in version 2.7.9 - Import rather than depend on BiocParallel <2018-10-15 Mon> diff --git a/R/AllGenerics.R b/R/AllGenerics.R index e560fbf9a..553b757b2 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -151,3 +151,6 @@ setGeneric("productMz", function(object, ...) standardGeneric("productMz")) ## centroiding related setGeneric("estimateMzResolution", function(object, ...) standardGeneric("estimateMzResolution")) + +setGeneric("combineSpectra", function(object, ...) + standardGeneric("combineSpectra")) diff --git a/R/functions-MSnExp.R b/R/functions-MSnExp.R index eba528ea5..7442bdb13 100644 --- a/R/functions-MSnExp.R +++ b/R/functions-MSnExp.R @@ -448,7 +448,7 @@ estimateMzScattering <- function(x, halfWindowSize = 1L, timeDomain = FALSE) { #' signal to noise ratio. #' #' Intensities (and m/z values) for signals with the same m/z value in -#' consecutive scans are aggregated using the `intensityFun` and `mzFun`. +#' consecutive scans are aggregated using the `intensityFun`. #' m/z values of intensities from consecutive scans will never be exactly #' identical, even if they represent signal from the same ion. The function #' determines thus internally a similarity threshold based on differences @@ -457,7 +457,7 @@ estimateMzScattering <- function(x, halfWindowSize = 1L, timeDomain = FALSE) { #' threshold is estimated on the 100 spectra with the largest number of #' m/z - intensity pairs (i.e. mass peaks). #' -#' See [combineSpectra()] for details. +#' See [meanMzInts()] for details. #' #' Parameter `timeDomain`: by default, m/z-intensity pairs from consecutive #' scans to be aggregated are defined based on the square root of the m/z @@ -485,7 +485,7 @@ estimateMzScattering <- function(x, halfWindowSize = 1L, timeDomain = FALSE) { #' #' @param BPPARAM parallel processing settings. #' -#' @inheritParams combineSpectra +#' @inheritParams meanMzInts #' #' @return `MSnExp` with the same number of spectra than `x`. #' @@ -493,7 +493,7 @@ estimateMzScattering <- function(x, halfWindowSize = 1L, timeDomain = FALSE) { #' #' @seealso #' -#' [combineSpectra()] for the function combining spectra provided in +#' [meanMzInts()] for the function combining spectra provided in #' a `list`. #' #' [estimateMzScattering()] for a function to estimate m/z value scattering in @@ -537,10 +537,10 @@ estimateMzScattering <- function(x, halfWindowSize = 1L, timeDomain = FALSE) { #' plot(chr_comb) #' ## Chromatographic data is "smoother" after combining. combineSpectraMovingWindow <- function(x, halfWindowSize = 1L, - mzFun = base::mean, intensityFun = base::mean, mzd = NULL, - timeDomain = TRUE, + timeDomain = FALSE, + weighted = FALSE, BPPARAM = bpparam()){ if (!is(x, "MSnExp")) stop("'x' has to be a 'MSnExp' or an 'OnDiskMSnExp'") @@ -548,7 +548,7 @@ combineSpectraMovingWindow <- function(x, halfWindowSize = 1L, x <- as(x, "MSnExp") ## Combine spectra per file new_sp <- bplapply(split(spectra(x), fromFile(x)), FUN = function(z, intF, - mzF, hws, + wght, hws, mzd, timeD) { len_z <- length(z) @@ -566,13 +566,14 @@ combineSpectraMovingWindow <- function(x, halfWindowSize = 1L, res <- vector("list", len_z) hwsp <- hws + 1L for (i in seq_along(z)) { - res[[i]] <- combineSpectra(z[windowIndices(i, hws, len_z)], - mzFun = mzF, intensityFun = intF, + res[[i]] <- meanMzInts(z[windowIndices(i, hws, len_z)], + weighted = wght, intensityFun = intF, main = hwsp - (i <= hws) * (hwsp - i), - mzd = mzd, timeDomain = timeD) + mzd = mzd, timeDomain = timeD, + unionPeaks = FALSE) } res - }, intF = intensityFun, mzF = mzFun, hws = as.integer(halfWindowSize), + }, intF = intensityFun, wght = weighted, hws = as.integer(halfWindowSize), mzd = mzd, timeD = timeDomain, BPPARAM = BPPARAM) new_sp <- unsplit(new_sp, fromFile(x)) names(new_sp) <- featureNames(x) diff --git a/R/functions-Spectrum.R b/R/functions-Spectrum.R index 687ca3006..cdb7cbe5d 100644 --- a/R/functions-Spectrum.R +++ b/R/functions-Spectrum.R @@ -800,16 +800,33 @@ descendPeak <- function(mz, intensity, peakIdx = NULL, signalPercentage = 33, res } -#' @title Combine profile-mode spectra signals +#' @title Combine a list of spectra to a single spectrum #' #' @description +#' +#' Combine peaks from several spectra into a single spectrum. Intensity and +#' m/z values from the input spectra are aggregated into a single peak if +#' the difference between their m/z values is smaller than `mzd`.Intensity +#' values are aggregated with the `intensityFun`, m/z values by the mean, or +#' intensity weighted mean if `weighted = TRUE`. +#' +#' @note #' -#' Combine signals from profile-mode spectra of consecutive scans into the -#' values for the *main* spectrum. This can improve centroiding of -#' profile-mode data by increasing the signal-to-noise ratio. +#' This allows e.g. to combine profile-mode spectra of consecutive scans into +#' the values for the *main* spectrum. This can improve centroiding of +#' profile-mode data by increasing the signal-to-noise ratio and is used in the +#' [combineSpectraMovingWindow()] function. #' #' @details #' +#' For general merging of spectra, the `mzd` should be manually specified based +#' on the precision of the MS instrument. Peaks from spectra with a difference +#' in their m/z being smaller than `mzd` are grouped into the same final peak +#' with their intensities being aggregated with the `intensityFun` function. +#' +#' +#' Some details for the combination of consecutive spectra of an LCMS run: +#' #' The m/z values of the same ion in consecutive scans (spectra) of a LCMS run #' will not be identical. Assuming that this random variation is much smaller #' than the resolution of the MS instrument (i.e. the difference between @@ -834,22 +851,22 @@ descendPeak <- function(mz, intensity, peakIdx = NULL, signalPercentage = 33, #' @param x `list` of `Spectrum` objects. #' #' @param main `integer(1)` defining the *main* spectrum, i.e. the spectrum -#' which m/z and intensity values get replaced and is returned. -#' -#' @param mzFun `function` to aggregate the m/z values per m/z group. Should be -#' a function or the name of a function. The function is expected to -#' return a `numeric(1)`. For `mzFun = "weighted.mean"` (note -#' that the *name* of the function is passed!) the resulting m/z is -#' determined as an intensity-weighted mean of spectras' m/z values. +#' which m/z and intensity values get replaced and is returned. By default +#' the *first* spectrum in `x` is used. #' +#' @param weighted `logical(1)` whether m/z values per m/z group should be +#' aggregated with an intensity-weighted mean. The default is to report +#' the mean m/z. +#' #' @param intensityFun `function` to aggregate the intensity values per m/z #' group. Should be a function or the name of a function. The function is #' expected to return a `numeric(1)`. #' #' @param mzd `numeric(1)` defining the maximal m/z difference below which -#' values are grouped. If not provided, this value is estimated from the -#' distribution of differences of m/z values from the provided spectra -#' (see details). +#' mass peaks are considered to represent the same ion/mass peak. +#' Intensity values for such grouped mass peaks are aggregated. If not +#' specified this value is estimated from the distribution of differences +#' of m/z values from the provided spectra (see details). #' #' @param timeDomain `logical(1)` whether definition of the m/z values to be #' combined into one m/z is performed on m/z values @@ -858,16 +875,27 @@ descendPeak <- function(mz, intensity, peakIdx = NULL, signalPercentage = 33, #' on the time domain (see details). Note that a pre-defined `mzd` should #' also be estimated on the square root of m/z values if #' `timeDomain = TRUE`. +#' +#' @param unionPeaks `logical(1)` whether the union of all peaks (peak groups) +#' from all spectra are reported or only peak groups that contain peaks +#' that are present in the *main* spectrum (defined by `main`). The default +#' is to report the union of peaks from all spectra. +#' +#' @param ... additional parameters that are passed to `intensityFun`. #' #' @return #' #' `Spectrum` with m/z and intensity values representing the aggregated values -#' across the provided spectra. The returned spectrum contains the same number -#' of m/z and intensity pairs than the spectrum with index `main` in `x`, also -#' all other related information is taken from this spectrum. +#' across the provided spectra. The returned spectrum contains the union of +#' all peaks from all spectra (if `unionPeaks = TRUE`), or the same number of +#' m/z and intensity pairs than the spectrum with index `main` in `x` (if +#' `unionPeaks = FALSE`. All other spectrum data (such as retention time etc) +#' is taken from the *main* spectrum. #' #' @author Johannes Rainer, Sigurdur Smarason #' +#' @family spectra combination functions +#' #' @seealso #' #' [estimateMzScattering()] for a function to estimate m/z scattering @@ -904,7 +932,7 @@ descendPeak <- function(mz, intensity, peakIdx = NULL, signalPercentage = 33, #' intensity = ints3) #' #' ## Combine the spectra -#' sp_agg <- combineSpectra(list(sp1, sp2, sp3)) +#' sp_agg <- meanMzInts(list(sp1, sp2, sp3)) #' #' ## Plot the spectra before and after combining #' par(mfrow = c(2, 1), mar = c(4.3, 4, 1, 1)) @@ -913,11 +941,12 @@ descendPeak <- function(mz, intensity, peakIdx = NULL, signalPercentage = 33, #' points(mz(sp3), intensity(sp3), type = "h", col = "blue") #' plot(mz(sp_agg), intensity(sp_agg), xlim = range(mzs[5:25]), type = "h", #' col = "black") -combineSpectra <- function(x, mzFun = base::mean, intensityFun = base::mean, - main = floor(length(x) / 2L) + 1L, mzd, - timeDomain = TRUE) { +meanMzInts <- function(x, ..., intensityFun = base::mean, weighted = FALSE, + main = 1L, mzd, timeDomain = FALSE, unionPeaks = TRUE) { if (length(unique(unlist(lapply(x, function(z) z@msLevel)))) != 1) stop("Can only combine spectra with the same MS level") + if (main > length(x) || main < 1) + stop("'main' should be an integer between 1 and ", length(x)) mzs <- lapply(x, function(z) z@mz) mzs_lens <- base::lengths(mzs) mzs <- unlist(mzs, use.names = FALSE) @@ -928,20 +957,24 @@ combineSpectra <- function(x, mzFun = base::mean, intensityFun = base::mean, else mz_groups <- .group_mz_values(mzs, mzd = mzd) if (length(unique(mz_groups)) < length(x[[main]]@mz)) - stop("Got less m/z groups than m/z values in the original spectrum. ", - "Most likely the data is not profile-mode LCMS data.") - ## Want to keep only those groups with a m/z from the main spectrum. - ## vectorized version from @sgibb - is_in_main <- rep.int(seq_along(mzs_lens), mzs_lens)[mz_order] == main - keep <- mz_groups %in% mz_groups[is_in_main] - ## Keep only values for which a m/z in main is present. - mz_groups <- mz_groups[keep] - mzs <- mzs[keep] - ints <- unlist(base::lapply(x, function(z) z@intensity))[mz_order][keep] - ## Create result. + warning("Got less m/z groups than m/z values in the original spectrum.", + " Most likely the data is not profile-mode LCMS data or ", + "'mzd' is too large.") + ints <- unlist(base::lapply(x, function(z) z@intensity), + use.names = FALSE)[mz_order] new_sp <- x[[main]] + if (!unionPeaks) { + ## Want to keep only those groups with a m/z from the main spectrum. + ## vectorized version from @sgibb + is_in_main <- rep.int(seq_along(mzs_lens), mzs_lens)[mz_order] == main + keep <- mz_groups %in% mz_groups[is_in_main] + ## Keep only values for which a m/z in main is present. + mz_groups <- mz_groups[keep] + mzs <- mzs[keep] + ints <- ints[keep] + } ## Support also weighted.mean: - if (is.character(mzFun) && mzFun == "weighted.mean") { + if (weighted) { intsp <- split(ints, mz_groups) new_sp@mz <- base::mapply(split(mzs, mz_groups), intsp, FUN = function(mz_vals, w) @@ -950,14 +983,15 @@ combineSpectra <- function(x, mzFun = base::mean, intensityFun = base::mean, USE.NAMES = FALSE, SIMPLIFY = TRUE) new_sp@intensity <- base::vapply(intsp, FUN = intensityFun, FUN.VALUE = numeric(1), - USE.NAMES = FALSE) + USE.NAMES = FALSE, ...) } else { - new_sp@mz <- base::vapply(split(mzs, mz_groups), FUN = mzFun, - FUN.VALUE = numeric(1), USE.NAMES = FALSE) + new_sp@mz <- base::vapply(split(mzs, mz_groups), FUN = base::mean, + FUN.VALUE = numeric(1), USE.NAMES = FALSE, + ...) new_sp@intensity <- base::vapply(split(ints, mz_groups), FUN = intensityFun, FUN.VALUE = numeric(1), - USE.NAMES = FALSE) + USE.NAMES = FALSE, ...) } if (is.unsorted(new_sp@mz)) stop("m/z values of combined spectrum are not ordered") @@ -1055,3 +1089,119 @@ combineSpectra <- function(x, mzFun = base::mean, intensityFun = base::mean, ## plot = FALSE) ## h$mids[which.max(h$counts)] } + +#' @title Combine spectra to a consensus spectrum +#' +#' @description +#' +#' `consensusSpectrum` takes a list of spectra and combines them to a +#' consensus spectrum containing mass peaks that are present in a user +#' definable proportion of spectra. +#' +#' @details +#' +#' Peaks from spectra with a difference of their m/z being smaller than `mzd` +#' are grouped into the same final mass peak with their intensities being +#' aggregated with `intensityFun`. The m/z of the final mass peaks is calculated +#' using a intensity-weighted mean of the m/z values from the individual mass +#' peaks. +#' +#' @param x `list` of [Spectrum-class] objects (either [Spectrum1-class] or +#' [Spectrum2-class]). +#' +#' @param mzd `numeric(1)` defining the maximal m/z difference below which +#' mass peaks are grouped in to the same final mass peak (see details for +#' more information). If not provided this value is estimated from the +#' distribution of differences of m/z values from the spectra (see +#' [meanMzInts()] for more details). +#' +#' @param minProp `numeric(1)` defining the minimal proportion of spectra in +#' which a mass peak has to be present in order to include it in the +#' final consensus spectrum. Should be a number between 0 and 1 (present in +#' all spectra). +#' +#' @param intensityFun `function` to be used to define the intensity of the +#' aggregated peak. By default the maximum signal for a mass peak is +#' reported. +#' +#' @param ... additional arguments to be passed to `intensityFun`. +#' +#' @md +#' +#' @author Johannes Rainer +#' +#' @family spectra combination functions +#' +#' @examples +#' library(MSnbase) +#' ## Create 3 example spectra. +#' sp1 <- new("Spectrum2", rt = 1, precursorMz = 1.41, +#' mz = c(1.2, 1.5, 1.8, 3.6, 4.9, 5.0, 7.8, 8.4), +#' intensity = c(10, 3, 140, 14, 299, 12, 49, 20)) +#' sp2 <- new("Spectrum2", rt = 1.1, precursorMz = 1.4102, +#' mz = c(1.4, 1.81, 2.4, 4.91, 6.0, 7.2, 9), +#' intensity = c(3, 184, 8, 156, 12, 23, 10)) +#' sp3 <- new("Spectrum2", rt = 1.2, precursorMz = 1.409, +#' mz = c(1, 1.82, 2.2, 3, 7.0, 8), +#' intensity = c(8, 210, 7, 101, 17, 8)) +#' spl <- Spectra(sp1, sp2, sp3) +#' +#' ## Plot the spectra, each in a different color +#' par(mfrow = c(2, 1), mar = c(4.3, 4, 1, 1)) +#' plot(mz(sp1), intensity(sp1), type = "h", col = "#ff000080", lwd = 2, +#' xlab = "m/z", ylab = "intensity", xlim = range(mz(spl)), +#' ylim = range(intensity(spl))) +#' points(mz(sp2), intensity(sp2), type = "h", col = "#00ff0080", lwd = 2) +#' points(mz(sp3), intensity(sp3), type = "h", col = "#0000ff80", lwd = 2) +#' +#' cons <- consensusSpectrum(spl, mzd = 0.02, minProp = 2/3) +#' +#' ## Peaks of the consensus spectrum +#' mz(cons) +#' intensity(cons) +#' +#' ## Other Spectrum data is taken from the first Spectrum in the list +#' rtime(cons) +#' precursorMz(cons) +#' +#' plot(mz(cons), intensity(cons), type = "h", xlab = "m/z", ylab = "intensity", +#' xlim = range(mz(spl)), ylim = range(intensity(spl)), lwd = 2) +#' +consensusSpectrum <- function(x, mzd, minProp = 0.5, intensityFun = base::max, + ...) { + if (!is(x, "Spectra")) + x <- Spectra(x) + if (length(unique(msLevel(x))) != 1) + stop("Can only combine spectra with the same MS level") + xnew <- x[[1]] # data from the first peak. + mzs <- mz(x) + mzs_lens <- base::lengths(mzs) + mzs <- unlist(mzs, use.names = FALSE) + mz_order <- base::order(mzs) + mzs <- mzs[mz_order] + ints <- unlist(intensity(x), use.names = FALSE)[mz_order] + keep <- which(ints > 0) + mzs <- mzs[keep] + ints <- ints[keep] + mz_groups <- MSnbase:::.group_mz_values(mzs, mzd = mzd) + if (length(unique(mz_groups)) < sum(xnew@intensity > 0)) + warning("Got less m/z groups than m/z groups in the first spectrum. ", + "Most likely `mzd` is too large.") + mzs <- split(mzs, mz_groups) + ints <- split(ints, mz_groups) + keep <- lengths(mzs) >= (length(x) * minProp) + if (any(keep)) { + xnew@mz <- mapply(mzs[keep], ints[keep], FUN = function(mz_vals, w) + stats::weighted.mean(mz_vals, w + 1, na.rm = TRUE), + USE.NAMES = FALSE) + xnew@intensity <- base::vapply(ints[keep], FUN = intensityFun, + FUN.VALUE = numeric(1), + USE.NAMES = FALSE, ...) + } else { + warning("No peak present in more than ", minProp * 100, "% of spectra.") + xnew@mz <- numeric() + xnew@intensity <- numeric() + } + xnew@peaksCount <- length(xnew@mz) + if (validObject(xnew)) xnew +} diff --git a/R/methods-Spectra.R b/R/methods-Spectra.R index 5df5770de..e6193300d 100644 --- a/R/methods-Spectra.R +++ b/R/methods-Spectra.R @@ -410,6 +410,111 @@ setMethod("smooth", "Spectra", function(x, method = c("SavitzkyGolay", x }) +#' @rdname combineSpectra +#' +#' @aliases combineSpectra +#' +#' @title Combine Spectra +#' +#' `combineSpectra` combines spectra in a [MSnExp-class] or [Spectra-class] +#' object applying the summarization function `fun` to sets of spectra defined +#' by a factor (`fcol` parameter). The resulting combined spectrum for each +#' set contains metadata information (present in `mcols` and all spectrum +#' information other than `mz` and `intensity`) from the first spectrum in +#' each set. +#' +#' @param object A [MSnExp-class] or [Spectra-class] +#' +#' @param fcol For `Spectra` objects: `mcols` column name to be used to define +#' the sets of spectra to be combined. If missing, all spectra are +#' considered to be one set. +#' +#' @param fun `function` to be used to combine the spectra by `fcol`. Has to +#' be a function that takes a list of spectra as input and returns a single +#' [Spectrum-class]. See [meanMzInts()] for details.. +#' +#' @param ... additional arguments for `fun`. +#' +#' @return A `Spectra` or `MSnExp` object with combined spectra. Metadata +#' (`mcols`) and all spectrum attributes other than `mz` and `intensity` +#' are taken from the first `Spectrum` in each set. +#' +#' @md +#' +#' @author Johannes Rainer, Laurent Gatto +#' +#' @seealso [meanMzInts()] for a function to combine spectra. +#' +#' @examples +#' +#' set.seed(123) +#' mzs <- seq(1, 20, 0.1) +#' ints1 <- abs(rnorm(length(mzs), 10)) +#' ints1[11:20] <- c(15, 30, 90, 200, 500, 300, 100, 70, 40, 20) # add peak +#' ints2 <- abs(rnorm(length(mzs), 10)) +#' ints2[11:20] <- c(15, 30, 60, 120, 300, 200, 90, 60, 30, 23) +#' ints3 <- abs(rnorm(length(mzs), 10)) +#' ints3[11:20] <- c(13, 20, 50, 100, 200, 100, 80, 40, 30, 20) +#' +#' ## Create the spectra. +#' sp1 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), +#' intensity = ints1, rt = 1) +#' sp2 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), +#' intensity = ints2, rt = 2) +#' sp3 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.009), +#' intensity = ints3, rt = 3) +#' +#' spctra <- Spectra(sp1, sp2, sp3, +#' elementMetadata = DataFrame(idx = 1:3, group = c("b", "a", "a"))) +#' +#' ## Combine the spectra reporting the maximym signal +#' res <- combineSpectra(spctra, mzd = 0.05, intensityFun = max) +#' res +#' +#' ## All values other than m/z and intensity are kept from the first spectrum +#' rtime(res) +#' +#' ## Plot the individual and the merged spectrum +#' par(mfrow = c(2, 1), mar = c(4.3, 4, 1, 1)) +#' plot(mz(sp1), intensity(sp1), xlim = range(mzs[5:25]), type = "h", col = "red") +#' points(mz(sp2), intensity(sp2), type = "h", col = "green") +#' points(mz(sp3), intensity(sp3), type = "h", col = "blue") +#' plot(mz(res[[1]]), intensity(res[[1]]), type = "h", +#' col = "black", xlim = range(mzs[5:25])) +#' +#' ## Combine spectra in two sets. +#' res <- combineSpectra(spctra, fcol = "group", mzd = 0.05) +#' res +#' +#' rtime(res) +#' +#' ## Plot the individual and the merged spectra +#' par(mfrow = c(3, 1), mar = c(4.3, 4, 1, 1)) +#' plot(mz(sp1), intensity(sp1), xlim = range(mzs[5:25]), type = "h", col = "red") +#' points(mz(sp2), intensity(sp2), type = "h", col = "green") +#' points(mz(sp3), intensity(sp3), type = "h", col = "blue") +#' plot(mz(res[[1]]), intensity(res[[1]]), xlim = range(mzs[5:25]), type = "h", +#' col = "black") +#' plot(mz(res[[2]]), intensity(res[[2]]), xlim = range(mzs[5:25]), type = "h", +#' col = "black") +setMethod("combineSpectra", "Spectra", function(object, fcol, + fun = meanMzInts, ...) { + if (missing(fcol)) { + .by <- factor(rep(1, length(object))) + } else { + if (!any(fcol %in% colnames(mcols(object)))) + stop("'fcol' does not match any column names of 'mcols(object)'") + .by <- factor(mcols(object)[, fcol], + levels = unique(mcols(object)[, fcol])) + } + Spectra( + lapply(split(object, .by), FUN = fun, ...), + elementMetadata = mcols(object, use.names = FALSE)[ + levelIndex(.by, which = "first"), , drop = FALSE + ]) +}) + + ## Still to implement: ## quantify, method = c("trapezoidation", "max", "sum", reporters, strict = FALSE) ## normalize, method = c("max", "sum", "precursor", precursorIntensity) diff --git a/R/utils.R b/R/utils.R index 2c2d18961..2dd2183ff 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1374,3 +1374,49 @@ hasSpectra <- function(files) { hasChromatograms <- function(files) { sapply(files, mzR:::.hasChromatograms) } + +#' @title Get the index of the particular element for each level +#' +#' `levelIndex` returns the index of the first, middle or last element for +#' each level of a factor within the factor. +#' +#' @param x `factor` or `vector` that can be converted into a `factor` +#' +#' @param which `character` defining for which element the index should be +#' returned, can be either `"first"`, `"middle"` or `"last"`. +#' +#' @return `integer` same length than `levels(x)` with the index for each +#' level in `x`. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +#' +#' @examples +#' +#' f <- factor(c("a", "a", "b", "a", "b", "c", "c", "b", "d", "d", "d")) +#' f +#' +#' levelIndex(f, which = "first") +#' levelIndex(f, which = "middle") +#' levelIndex(f, which = "last") +#' +#' f <- factor(c("a", "a", "b", "a", "b", "c", "c", "b", "d", "d", "d"), +#' levels = c("d", "a", "c", "b")) +#' levelIndex(f, which = "first") +#' levelIndex(f, which = "middle") +#' levelIndex(f, which = "last") +levelIndex <- function(x, which = c("first", "middle", "last")) { + x <- as.factor(x) + res <- switch(match.arg(which), + "first" = match(levels(x), x), + "last" = length(x) - match(levels(x), rev(x)) + 1L, + "middle" = vapply(levels(x), function(z) { + idx <- which(x == z) + idx[ceiling(length(idx) / 2L)] + }, integer(1), USE.NAMES = FALSE)) + names(res) <- levels(x) + res +} diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd index 141eab04a..aacc6706b 100644 --- a/man/combineSpectra.Rd +++ b/man/combineSpectra.Rd @@ -1,79 +1,50 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/functions-Spectrum.R -\name{combineSpectra} +% Please edit documentation in R/methods-Spectra.R +\docType{methods} +\name{combineSpectra,Spectra-method} +\alias{combineSpectra,Spectra-method} \alias{combineSpectra} -\title{Combine profile-mode spectra signals} +\title{Combine Spectra + +\code{combineSpectra} combines spectra in a \linkS4class{MSnExp} or \linkS4class{Spectra} +object applying the summarization function \code{fun} to sets of spectra defined +by a factor (\code{fcol} parameter). The resulting combined spectrum for each +set contains metadata information (present in \code{mcols} and all spectrum +information other than \code{mz} and \code{intensity}) from the first spectrum in +each set.} \usage{ -combineSpectra(x, mzFun = base::mean, intensityFun = base::mean, - main = floor(length(x)/2L) + 1L, mzd, timeDomain = TRUE) +\S4method{combineSpectra}{Spectra}(object, fcol, fun = meanMzInts, ...) } \arguments{ -\item{x}{\code{list} of \code{Spectrum} objects.} - -\item{mzFun}{\code{function} to aggregate the m/z values per m/z group. Should be -a function or the name of a function. The function is expected to -return a \code{numeric(1)}. For \code{mzFun = "weighted.mean"} (note -that the \emph{name} of the function is passed!) the resulting m/z is -determined as an intensity-weighted mean of spectras' m/z values.} +\item{object}{A \linkS4class{MSnExp} or \linkS4class{Spectra}} -\item{intensityFun}{\code{function} to aggregate the intensity values per m/z -group. Should be a function or the name of a function. The function is -expected to return a \code{numeric(1)}.} +\item{fcol}{For \code{Spectra} objects: \code{mcols} column name to be used to define +the sets of spectra to be combined. If missing, all spectra are +considered to be one set.} -\item{main}{\code{integer(1)} defining the \emph{main} spectrum, i.e. the spectrum -which m/z and intensity values get replaced and is returned.} +\item{fun}{\code{function} to be used to combine the spectra by \code{fcol}. Has to +be a function that takes a list of spectra as input and returns a single +\linkS4class{Spectrum}. See \code{\link[=meanMzInts]{meanMzInts()}} for details..} -\item{mzd}{\code{numeric(1)} defining the maximal m/z difference below which -values are grouped. If not provided, this value is estimated from the -distribution of differences of m/z values from the provided spectra -(see details).} - -\item{timeDomain}{\code{logical(1)} whether definition of the m/z values to be -combined into one m/z is performed on m/z values -(\code{timeDomain = FALSE}) or on \code{sqrt(mz)} (\code{timeDomain = TRUE}). -Profile data from TOF MS instruments should be aggregated based -on the time domain (see details). Note that a pre-defined \code{mzd} should -also be estimated on the square root of m/z values if -\code{timeDomain = TRUE}.} +\item{...}{additional arguments for \code{fun}.} } \value{ -\code{Spectrum} with m/z and intensity values representing the aggregated values -across the provided spectra. The returned spectrum contains the same number -of m/z and intensity pairs than the spectrum with index \code{main} in \code{x}, also -all other related information is taken from this spectrum. +A \code{Spectra} or \code{MSnExp} object with combined spectra. Metadata +(\code{mcols}) and all spectrum attributes other than \code{mz} and \code{intensity} +are taken from the first \code{Spectrum} in each set. } \description{ -Combine signals from profile-mode spectra of consecutive scans into the -values for the \emph{main} spectrum. This can improve centroiding of -profile-mode data by increasing the signal-to-noise ratio. -} -\details{ -The m/z values of the same ion in consecutive scans (spectra) of a LCMS run -will not be identical. Assuming that this random variation is much smaller -than the resolution of the MS instrument (i.e. the difference between -m/z values within each single spectrum), m/z value groups are defined -across the spectra and those containing m/z values of the \code{main} spectrum -are retained. The maximum allowed difference between m/z values for the -same ion is estimated as in \code{\link[=estimateMzScattering]{estimateMzScattering()}}. Alternatively it is -possible to define this maximal m/z difference with the \code{mzd} parameter. -All m/z values with a difference smaller than this value are combined to -a m/z group. -Intensities and m/z values falling within each of these m/z groups are -aggregated using the \code{intensity_fun} and \code{mz_fun}, respectively. It is -highly likely that all QTOF profile data is collected with a timing circuit -that collects data points with regular intervals of time that are then later -converted into m/z values based on the relationship \code{t = k * sqrt(m/z)}. The -m/z scale is thus non-linear and the m/z scattering (which is in fact caused -by small variations in the time circuit) will thus be different in the lower -and upper m/z scale. m/z-intensity pairs from consecutive scans to be -combined are therefore defined by default on the square root of the m/z -values. With \code{timeDomain = FALSE}, the actual m/z values will be used. +Combine Spectra + +\code{combineSpectra} combines spectra in a \linkS4class{MSnExp} or \linkS4class{Spectra} +object applying the summarization function \code{fun} to sets of spectra defined +by a factor (\code{fcol} parameter). The resulting combined spectrum for each +set contains metadata information (present in \code{mcols} and all spectrum +information other than \code{mz} and \code{intensity}) from the first spectrum in +each set. } \examples{ -library(MSnbase) -## Create 3 example profile-mode spectra with a resolution of 0.1 and small -## random variations on these m/z values on consecutive scans. set.seed(123) mzs <- seq(1, 20, 0.1) ints1 <- abs(rnorm(length(mzs), 10)) @@ -85,33 +56,49 @@ ints3[11:20] <- c(13, 20, 50, 100, 200, 100, 80, 40, 30, 20) ## Create the spectra. sp1 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), - intensity = ints1) + intensity = ints1, rt = 1) sp2 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), - intensity = ints2) + intensity = ints2, rt = 2) sp3 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.009), - intensity = ints3) + intensity = ints3, rt = 3) + +spctra <- Spectra(sp1, sp2, sp3, + elementMetadata = DataFrame(idx = 1:3, group = c("b", "a", "a"))) -## Combine the spectra -sp_agg <- combineSpectra(list(sp1, sp2, sp3)) +## Combine the spectra reporting the maximym signal +res <- combineSpectra(spctra, mzd = 0.05, intensityFun = max) +res -## Plot the spectra before and after combining +## All values other than m/z and intensity are kept from the first spectrum +rtime(res) + +## Plot the individual and the merged spectrum par(mfrow = c(2, 1), mar = c(4.3, 4, 1, 1)) plot(mz(sp1), intensity(sp1), xlim = range(mzs[5:25]), type = "h", col = "red") points(mz(sp2), intensity(sp2), type = "h", col = "green") points(mz(sp3), intensity(sp3), type = "h", col = "blue") -plot(mz(sp_agg), intensity(sp_agg), xlim = range(mzs[5:25]), type = "h", +plot(mz(res[[1]]), intensity(res[[1]]), type = "h", + col = "black", xlim = range(mzs[5:25])) + +## Combine spectra in two sets. +res <- combineSpectra(spctra, fcol = "group", mzd = 0.05) +res + +rtime(res) + +## Plot the individual and the merged spectra +par(mfrow = c(3, 1), mar = c(4.3, 4, 1, 1)) +plot(mz(sp1), intensity(sp1), xlim = range(mzs[5:25]), type = "h", col = "red") +points(mz(sp2), intensity(sp2), type = "h", col = "green") +points(mz(sp3), intensity(sp3), type = "h", col = "blue") +plot(mz(res[[1]]), intensity(res[[1]]), xlim = range(mzs[5:25]), type = "h", + col = "black") +plot(mz(res[[2]]), intensity(res[[2]]), xlim = range(mzs[5:25]), type = "h", col = "black") } \seealso{ -\code{\link[=estimateMzScattering]{estimateMzScattering()}} for a function to estimate m/z scattering -in consecutive scans. - -\code{\link[=estimateMzResolution]{estimateMzResolution()}} for a function estimating the m/z resolution of -a spectrum. - -\code{\link[=combineSpectraMovingWindow]{combineSpectraMovingWindow()}} for the function to combine consecutive -spectra of an \code{MSnExp} object using a moving window approach. +\code{\link[=meanMzInts]{meanMzInts()}} for a function to combine spectra. } \author{ -Johannes Rainer, Sigurdur Smarason +Johannes Rainer, Laurent Gatto } diff --git a/man/combineSpectraMovingWindow.Rd b/man/combineSpectraMovingWindow.Rd index 510773506..fccd7b19c 100644 --- a/man/combineSpectraMovingWindow.Rd +++ b/man/combineSpectraMovingWindow.Rd @@ -4,9 +4,9 @@ \alias{combineSpectraMovingWindow} \title{Combine signal from consecutive spectra of LCMS experiments} \usage{ -combineSpectraMovingWindow(x, halfWindowSize = 1L, mzFun = base::mean, - intensityFun = base::mean, mzd = NULL, timeDomain = TRUE, - BPPARAM = bpparam()) +combineSpectraMovingWindow(x, halfWindowSize = 1L, + intensityFun = base::mean, mzd = NULL, timeDomain = FALSE, + weighted = FALSE, BPPARAM = bpparam()) } \arguments{ \item{x}{\code{MSnExp} or \code{OnDiskMSnExp} object.} @@ -14,20 +14,15 @@ combineSpectraMovingWindow(x, halfWindowSize = 1L, mzFun = base::mean, \item{halfWindowSize}{\code{integer(1)} with the half window size for the moving window.} -\item{mzFun}{\code{function} to aggregate the m/z values per m/z group. Should be -a function or the name of a function. The function is expected to -return a \code{numeric(1)}. For \code{mzFun = "weighted.mean"} (note -that the \emph{name} of the function is passed!) the resulting m/z is -determined as an intensity-weighted mean of spectras' m/z values.} - \item{intensityFun}{\code{function} to aggregate the intensity values per m/z group. Should be a function or the name of a function. The function is expected to return a \code{numeric(1)}.} \item{mzd}{\code{numeric(1)} defining the maximal m/z difference below which -values are grouped. If not provided, this value is estimated from the -distribution of differences of m/z values from the provided spectra -(see details).} +mass peaks are considered to represent the same ion/mass peak. +Intensity values for such grouped mass peaks are aggregated. If not +specified this value is estimated from the distribution of differences +of m/z values from the provided spectra (see details).} \item{timeDomain}{\code{logical(1)} whether definition of the m/z values to be combined into one m/z is performed on m/z values @@ -37,6 +32,10 @@ on the time domain (see details). Note that a pre-defined \code{mzd} should also be estimated on the square root of m/z values if \code{timeDomain = TRUE}.} +\item{weighted}{\code{logical(1)} whether m/z values per m/z group should be +aggregated with an intensity-weighted mean. The default is to report +the mean m/z.} + \item{BPPARAM}{parallel processing settings.} } \value{ @@ -59,7 +58,7 @@ data) and thus combines their signal which can increase the increase the signal to noise ratio. Intensities (and m/z values) for signals with the same m/z value in -consecutive scans are aggregated using the \code{intensityFun} and \code{mzFun}. +consecutive scans are aggregated using the \code{intensityFun}. m/z values of intensities from consecutive scans will never be exactly identical, even if they represent signal from the same ion. The function determines thus internally a similarity threshold based on differences @@ -68,7 +67,7 @@ considered to derive from the same ion. For robustness reasons, this threshold is estimated on the 100 spectra with the largest number of m/z - intensity pairs (i.e. mass peaks). -See \code{\link[=combineSpectra]{combineSpectra()}} for details. +See \code{\link[=meanMzInts]{meanMzInts()}} for details. Parameter \code{timeDomain}: by default, m/z-intensity pairs from consecutive scans to be aggregated are defined based on the square root of the m/z @@ -125,7 +124,7 @@ plot(chr_comb) ## Chromatographic data is "smoother" after combining. } \seealso{ -\code{\link[=combineSpectra]{combineSpectra()}} for the function combining spectra provided in +\code{\link[=meanMzInts]{meanMzInts()}} for the function combining spectra provided in a \code{list}. \code{\link[=estimateMzScattering]{estimateMzScattering()}} for a function to estimate m/z value scattering in diff --git a/man/consensusSpectrum.Rd b/man/consensusSpectrum.Rd new file mode 100644 index 000000000..4aa50e912 --- /dev/null +++ b/man/consensusSpectrum.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-Spectrum.R +\name{consensusSpectrum} +\alias{consensusSpectrum} +\title{Combine spectra to a consensus spectrum} +\usage{ +consensusSpectrum(x, mzd, minProp = 0.5, intensityFun = base::max, ...) +} +\arguments{ +\item{x}{\code{list} of \linkS4class{Spectrum} objects (either \linkS4class{Spectrum1} or +\linkS4class{Spectrum2}).} + +\item{mzd}{\code{numeric(1)} defining the maximal m/z difference below which +mass peaks are grouped in to the same final mass peak (see details for +more information). If not provided this value is estimated from the +distribution of differences of m/z values from the spectra (see +\code{\link[=meanMzInts]{meanMzInts()}} for more details).} + +\item{minProp}{\code{numeric(1)} defining the minimal proportion of spectra in +which a mass peak has to be present in order to include it in the +final consensus spectrum. Should be a number between 0 and 1 (present in +all spectra).} + +\item{intensityFun}{\code{function} to be used to define the intensity of the +aggregated peak. By default the maximum signal for a mass peak is +reported.} + +\item{...}{additional arguments to be passed to \code{intensityFun}.} +} +\description{ +\code{consensusSpectrum} takes a list of spectra and combines them to a +consensus spectrum containing mass peaks that are present in a user +definable proportion of spectra. +} +\details{ +Peaks from spectra with a difference of their m/z being smaller than \code{mzd} +are grouped into the same final mass peak with their intensities being +aggregated with \code{intensityFun}. The m/z of the final mass peaks is calculated +using a intensity-weighted mean of the m/z values from the individual mass +peaks. +} +\examples{ +library(MSnbase) +## Create 3 example spectra. +sp1 <- new("Spectrum2", rt = 1, precursorMz = 1.41, + mz = c(1.2, 1.5, 1.8, 3.6, 4.9, 5.0, 7.8, 8.4), + intensity = c(10, 3, 140, 14, 299, 12, 49, 20)) +sp2 <- new("Spectrum2", rt = 1.1, precursorMz = 1.4102, + mz = c(1.4, 1.81, 2.4, 4.91, 6.0, 7.2, 9), + intensity = c(3, 184, 8, 156, 12, 23, 10)) +sp3 <- new("Spectrum2", rt = 1.2, precursorMz = 1.409, + mz = c(1, 1.82, 2.2, 3, 7.0, 8), + intensity = c(8, 210, 7, 101, 17, 8)) +spl <- Spectra(sp1, sp2, sp3) + +## Plot the spectra, each in a different color +par(mfrow = c(2, 1), mar = c(4.3, 4, 1, 1)) +plot(mz(sp1), intensity(sp1), type = "h", col = "#ff000080", lwd = 2, + xlab = "m/z", ylab = "intensity", xlim = range(mz(spl)), + ylim = range(intensity(spl))) +points(mz(sp2), intensity(sp2), type = "h", col = "#00ff0080", lwd = 2) +points(mz(sp3), intensity(sp3), type = "h", col = "#0000ff80", lwd = 2) + +cons <- consensusSpectrum(spl, mzd = 0.02, minProp = 2/3) + +## Peaks of the consensus spectrum +mz(cons) +intensity(cons) + +## Other Spectrum data is taken from the first Spectrum in the list +rtime(cons) +precursorMz(cons) + +plot(mz(cons), intensity(cons), type = "h", xlab = "m/z", ylab = "intensity", + xlim = range(mz(spl)), ylim = range(intensity(spl)), lwd = 2) + +} +\seealso{ +Other spectra combination functions: \code{\link{meanMzInts}} +} +\author{ +Johannes Rainer +} +\concept{spectra combination functions} diff --git a/man/meanMzInts.Rd b/man/meanMzInts.Rd new file mode 100644 index 000000000..8441b30cb --- /dev/null +++ b/man/meanMzInts.Rd @@ -0,0 +1,144 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-Spectrum.R +\name{meanMzInts} +\alias{meanMzInts} +\title{Combine a list of spectra to a single spectrum} +\usage{ +meanMzInts(x, ..., intensityFun = base::mean, weighted = FALSE, + main = 1L, mzd, timeDomain = FALSE, unionPeaks = TRUE) +} +\arguments{ +\item{x}{\code{list} of \code{Spectrum} objects.} + +\item{...}{additional parameters that are passed to \code{intensityFun}.} + +\item{intensityFun}{\code{function} to aggregate the intensity values per m/z +group. Should be a function or the name of a function. The function is +expected to return a \code{numeric(1)}.} + +\item{weighted}{\code{logical(1)} whether m/z values per m/z group should be +aggregated with an intensity-weighted mean. The default is to report +the mean m/z.} + +\item{main}{\code{integer(1)} defining the \emph{main} spectrum, i.e. the spectrum +which m/z and intensity values get replaced and is returned. By default +the \emph{first} spectrum in \code{x} is used.} + +\item{mzd}{\code{numeric(1)} defining the maximal m/z difference below which +mass peaks are considered to represent the same ion/mass peak. +Intensity values for such grouped mass peaks are aggregated. If not +specified this value is estimated from the distribution of differences +of m/z values from the provided spectra (see details).} + +\item{timeDomain}{\code{logical(1)} whether definition of the m/z values to be +combined into one m/z is performed on m/z values +(\code{timeDomain = FALSE}) or on \code{sqrt(mz)} (\code{timeDomain = TRUE}). +Profile data from TOF MS instruments should be aggregated based +on the time domain (see details). Note that a pre-defined \code{mzd} should +also be estimated on the square root of m/z values if +\code{timeDomain = TRUE}.} + +\item{unionPeaks}{\code{logical(1)} whether the union of all peaks (peak groups) +from all spectra are reported or only peak groups that contain peaks +that are present in the \emph{main} spectrum (defined by \code{main}). The default +is to report the union of peaks from all spectra.} +} +\value{ +\code{Spectrum} with m/z and intensity values representing the aggregated values +across the provided spectra. The returned spectrum contains the union of +all peaks from all spectra (if \code{unionPeaks = TRUE}), or the same number of +m/z and intensity pairs than the spectrum with index \code{main} in \code{x} (if +\code{unionPeaks = FALSE}. All other spectrum data (such as retention time etc) +is taken from the \emph{main} spectrum. +} +\description{ +Combine peaks from several spectra into a single spectrum. Intensity and +m/z values from the input spectra are aggregated into a single peak if +the difference between their m/z values is smaller than \code{mzd}.Intensity +values are aggregated with the \code{intensityFun}, m/z values by the mean, or +intensity weighted mean if \code{weighted = TRUE}. +} +\details{ +For general merging of spectra, the \code{mzd} should be manually specified based +on the precision of the MS instrument. Peaks from spectra with a difference +in their m/z being smaller than \code{mzd} are grouped into the same final peak +with their intensities being aggregated with the \code{intensityFun} function. + +Some details for the combination of consecutive spectra of an LCMS run: + +The m/z values of the same ion in consecutive scans (spectra) of a LCMS run +will not be identical. Assuming that this random variation is much smaller +than the resolution of the MS instrument (i.e. the difference between +m/z values within each single spectrum), m/z value groups are defined +across the spectra and those containing m/z values of the \code{main} spectrum +are retained. The maximum allowed difference between m/z values for the +same ion is estimated as in \code{\link[=estimateMzScattering]{estimateMzScattering()}}. Alternatively it is +possible to define this maximal m/z difference with the \code{mzd} parameter. +All m/z values with a difference smaller than this value are combined to +a m/z group. +Intensities and m/z values falling within each of these m/z groups are +aggregated using the \code{intensity_fun} and \code{mz_fun}, respectively. It is +highly likely that all QTOF profile data is collected with a timing circuit +that collects data points with regular intervals of time that are then later +converted into m/z values based on the relationship \code{t = k * sqrt(m/z)}. The +m/z scale is thus non-linear and the m/z scattering (which is in fact caused +by small variations in the time circuit) will thus be different in the lower +and upper m/z scale. m/z-intensity pairs from consecutive scans to be +combined are therefore defined by default on the square root of the m/z +values. With \code{timeDomain = FALSE}, the actual m/z values will be used. +} +\note{ +This allows e.g. to combine profile-mode spectra of consecutive scans into +the values for the \emph{main} spectrum. This can improve centroiding of +profile-mode data by increasing the signal-to-noise ratio and is used in the +\code{\link[=combineSpectraMovingWindow]{combineSpectraMovingWindow()}} function. +} +\examples{ + +library(MSnbase) +## Create 3 example profile-mode spectra with a resolution of 0.1 and small +## random variations on these m/z values on consecutive scans. +set.seed(123) +mzs <- seq(1, 20, 0.1) +ints1 <- abs(rnorm(length(mzs), 10)) +ints1[11:20] <- c(15, 30, 90, 200, 500, 300, 100, 70, 40, 20) # add peak +ints2 <- abs(rnorm(length(mzs), 10)) +ints2[11:20] <- c(15, 30, 60, 120, 300, 200, 90, 60, 30, 23) +ints3 <- abs(rnorm(length(mzs), 10)) +ints3[11:20] <- c(13, 20, 50, 100, 200, 100, 80, 40, 30, 20) + +## Create the spectra. +sp1 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), + intensity = ints1) +sp2 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), + intensity = ints2) +sp3 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.009), + intensity = ints3) + +## Combine the spectra +sp_agg <- meanMzInts(list(sp1, sp2, sp3)) + +## Plot the spectra before and after combining +par(mfrow = c(2, 1), mar = c(4.3, 4, 1, 1)) +plot(mz(sp1), intensity(sp1), xlim = range(mzs[5:25]), type = "h", col = "red") +points(mz(sp2), intensity(sp2), type = "h", col = "green") +points(mz(sp3), intensity(sp3), type = "h", col = "blue") +plot(mz(sp_agg), intensity(sp_agg), xlim = range(mzs[5:25]), type = "h", + col = "black") +} +\seealso{ +\code{\link[=estimateMzScattering]{estimateMzScattering()}} for a function to estimate m/z scattering +in consecutive scans. + +\code{\link[=estimateMzResolution]{estimateMzResolution()}} for a function estimating the m/z resolution of +a spectrum. + +\code{\link[=combineSpectraMovingWindow]{combineSpectraMovingWindow()}} for the function to combine consecutive +spectra of an \code{MSnExp} object using a moving window approach. + +Other spectra combination functions: \code{\link{consensusSpectrum}} +} +\author{ +Johannes Rainer, Sigurdur Smarason +} +\concept{spectra combination functions} diff --git a/tests/testthat/test_MSnExp.R b/tests/testthat/test_MSnExp.R index 163b60c5c..c3d3f4c5a 100644 --- a/tests/testthat/test_MSnExp.R +++ b/tests/testthat/test_MSnExp.R @@ -543,7 +543,8 @@ test_that("combineSpectraMovingWindow works", { spctrl <- spectra(od[(idx -1):(idx + 1)]) od <- od[(idx - 3):(idx + 3)] - spctr <- combineSpectra(spctrl) + spctr <- meanMzInts(spctrl, timeDomain = TRUE, unionPeaks = FALSE, + main = 2L) ## Should be different from raw ones expect_true(is.character(all.equal(mz(spctr), mz(od[[4]])))) expect_true(is.character(all.equal(intensity(spctr), intensity(od[[4]])))) @@ -551,16 +552,18 @@ test_that("combineSpectraMovingWindow works", { ## Use pre-calculated mzd: mzd <- estimateMzScattering(od) ## If mzd is estimated on mz and combination on sqrt(mz) it will fail. - expect_error(od_comb <- combineSpectraMovingWindow(od, mzd = mzd[[4]])) + expect_warning(od_comb <- combineSpectraMovingWindow( + od, mzd = mzd[[4]], timeDomain = TRUE)) ## All on m/z scale - od_comb <- combineSpectraMovingWindow(od, mzd = mzd[[4]], timeDomain = FALSE) + od_comb <- combineSpectraMovingWindow(od, mzd = mzd[[4]], + timeDomain = FALSE) spctr_comb <- od_comb[[4]] expect_equal(mz(spctr_comb), mz(spctr)) expect_equal(intensity(spctr_comb), intensity(spctr)) ## Estimate on the sqrt(mz) mzd <- estimateMzScattering(od, timeDomain = TRUE) - od_comb <- combineSpectraMovingWindow(od, mzd = mzd[[4]]) + od_comb <- combineSpectraMovingWindow(od, mzd = mzd[[4]], timeDomain = TRUE) spctr_comb <- od_comb[[4]] expect_equal(mz(spctr_comb), mz(spctr)) expect_equal(intensity(spctr_comb), intensity(spctr)) @@ -575,7 +578,11 @@ test_that("combineSpectraMovingWindow works", { expect_equal(mz(spctr_comb), mz(spctr)) expect_equal(intensity(spctr_comb), intensity(spctr)) - + od_comb_w <- combineSpectraMovingWindow(od, weighted = TRUE) + spctr_w <- od_comb_w[[4]] + expect_true(sum(mz(spctr_comb) != mz(spctr_w)) > length(mz(spctr_w))/2) + expect_equal(intensity(spctr_comb), intensity(spctr_w)) + expect_equal(length(od), length(od_comb)) expect_equal(peaksCount(od), peaksCount(od_comb)) }) diff --git a/tests/testthat/test_Spectrum.R b/tests/testthat/test_Spectrum.R index 72e0f1984..2172ddfdf 100644 --- a/tests/testthat/test_Spectrum.R +++ b/tests/testthat/test_Spectrum.R @@ -530,7 +530,7 @@ test_that(".group_mz_values works", { expect_true(sum(res == 2) == 2) }) -test_that("combineSpectra works", { +test_that("meanMzInts works", { set.seed(123) mzs <- seq(1, 20, 0.1) mzs_2 <- c(mzs, 20.1) @@ -550,37 +550,50 @@ test_that("combineSpectra works", { intensity = ints3, rt = 3) sp4 <- new("Spectrum2", mz = mzs + rnorm(length(mzs), sd = 0.3), intensity = ints2, rt = 4) - expect_error(combineSpectra(list(sp1, sp2, sp3, sp4))) + expect_error(meanMzInts(list(sp1, sp2, sp3, sp4))) + expect_error(meanMzInts(list(sp1, sp2, sp3), main = 5)) + + res <- meanMzInts(list(sp1, sp2, sp3), timeDomain = TRUE, + unionPeaks = FALSE) + expect_equal(length(mz(res)), length(mz(sp1))) + expect_equal(rtime(res), rtime(sp1)) + + res <- meanMzInts(list(sp1, sp2, sp3), timeDomain = TRUE, + unionPeaks = TRUE, main = 2) + expect_true(length(mz(res)) > length(mz(sp2))) + expect_equal(rtime(res), rtime(sp2)) - res <- combineSpectra(list(sp1, sp2, sp3), timeDomain = TRUE) + res <- meanMzInts(list(sp2, sp1), timeDomain = FALSE, unionPeaks = FALSE) expect_equal(length(mz(res)), length(mz(sp2))) expect_equal(rtime(res), rtime(sp2)) - res <- combineSpectra(list(sp2, sp1), timeDomain = FALSE) + res <- meanMzInts(list(sp2, sp1), timeDomain = FALSE, unionPeaks = FALSE, + main = 2) expect_equal(length(mz(res)), length(mz(sp1))) expect_equal(rtime(res), rtime(sp1)) - + sp4 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.3), intensity = ints2, rt = 4) ## randon noise larger than resolution. - expect_error(res <- combineSpectra(list(sp1, sp3, sp4))) + expect_warning(res <- meanMzInts(list(sp1, sp3, sp4))) - res <- combineSpectra(list(sp1, sp2, sp3), main = 1, timeDomain = TRUE) + res <- meanMzInts(list(sp1, sp2, sp3), main = 1, timeDomain = TRUE, + unionPeaks = FALSE) expect_equal(rtime(res), rtime(sp1)) expect_equal(length(mz(res)), length(mz(sp1))) - res <- combineSpectra(list(sp1, sp2, sp3), main = 3, timeDomain = TRUE) + res <- meanMzInts(list(sp1, sp2, sp3), main = 3, timeDomain = TRUE, + unionPeaks = FALSE) expect_equal(rtime(res), rtime(sp3)) expect_equal(length(mz(res)), length(mz(sp3))) - res <- combineSpectra(list(sp1, sp1), intensityFun = sum, timeDomain = TRUE) + res <- meanMzInts(list(sp1, sp1), intensityFun = sum, + timeDomain = TRUE, unionPeaks = TRUE) expect_equal(mz(res), mz(sp1)) expect_equal(intensity(res), intensity(sp1) * 2) - ## Use character mzFun: - expect_error(combineSpectra(list(sp1, sp2, sp3), mzFun = "meani")) - res <- combineSpectra(list(sp1, sp2, sp3), mzFun = base::mean) - res2 <- combineSpectra(list(sp1, sp2, sp3), mzFun = "weighted.mean") + res <- meanMzInts(list(sp1, sp2, sp3)) + res2 <- meanMzInts(list(sp1, sp2, sp3), weighted = TRUE) expect_equal(intensity(res), intensity(res2)) expect_false(all(mz(res) == mz(res2))) @@ -588,18 +601,24 @@ test_that("combineSpectra works", { od1 <- filterFile(sciex, 1) lst <- spectra(od1[3:5]) - res <- combineSpectra(lst, timeDomain = TRUE) - res_2 <- combineSpectra(lst, timeDomain = FALSE) + res <- meanMzInts(lst, timeDomain = TRUE, main = 2) + res_2 <- meanMzInts(lst, timeDomain = FALSE, main = 2) expect_equal(mz(res), mz(res_2)) expect_equal(intensity(res), intensity(res_2)) ## with (wrongly) pre-calculated mzd mzd <- MSnbase:::.estimate_mz_scattering(sort(unlist(lapply(lst, mz)))) - expect_error(combineSpectra(lst, timeDomain = TRUE, mzd = mzd)) - res_3 <- combineSpectra(lst, timeDomain = FALSE, mzd = mzd) + expect_warning(meanMzInts(lst, timeDomain = TRUE, mzd = mzd)) + res_3 <- meanMzInts(lst, timeDomain = FALSE, mzd = mzd, main = 2) expect_equal(mz(res), mz(res_3)) expect_equal(intensity(res), intensity(res_3)) + + ## Difference between unionPeaks = TRUE and FALSE + res <- meanMzInts(lst, unionPeaks = FALSE, main = 2) + res_2 <- meanMzInts(lst, unionPeaks = TRUE, main = 2) + expect_true(length(mz(res)) < length(mz(res_2))) + mzs <- unique(unlist(lapply(lst, mz))) }) test_that(".estimate_mz_resolution, estimateMzResolution,Spectrum works", { @@ -632,3 +651,24 @@ test_that(".density works", { expect_equal(res$y, res_2$y) }) +test_that("consensusSpectrum works", { + sp1 <- new("Spectrum2", rt = 1, precursorMz = 1.41, + mz = c(1.2, 1.5, 1.8, 3.6, 4.9, 5.0, 7.8, 8.4), + intensity = c(10, 3, 140, 14, 299, 12, 49, 20)) + sp2 <- new("Spectrum2", rt = 1.1, precursorMz = 1.4102, + mz = c(1.4, 1.81, 2.4, 4.91, 6.0, 7.2, 9), + intensity = c(3, 184, 8, 156, 12, 23, 10)) + sp3 <- new("Spectrum2", rt = 1.2, precursorMz = 1.409, + mz = c(1, 1.82, 2.2, 3, 7.0, 8), + intensity = c(8, 210, 7, 101, 17, 8)) + spl <- Spectra(sp1, sp2, sp3) + + expect_error(consensusSpectrum(4)) + cons <- consensusSpectrum(spl, mzd = 0.02) + expect_true(is(cons, "Spectrum2")) + expect_equal(length(mz(cons)), 2) + expect_equal(rtime(cons), rtime(sp1)) + + cons <- consensusSpectrum(spl, mzd = 0.02, minProp = 1/3) + expect_equal(peaksCount(cons), 18) +}) diff --git a/tests/testthat/test_methods-Spectra.R b/tests/testthat/test_methods-Spectra.R index b2d230f89..cd00f9553 100644 --- a/tests/testthat/test_methods-Spectra.R +++ b/tests/testthat/test_methods-Spectra.R @@ -244,3 +244,54 @@ test_that("pickPeaks,Spectra and smooth,Spectra works", { expect_equal(res@listData, lapply(spctra, smooth)) }) +test_that("combineSpectra,Spectra works", { + set.seed(123) + mzs <- seq(1, 20, 0.1) + ints1 <- abs(rnorm(length(mzs), 10)) + ints1[11:20] <- c(15, 30, 90, 200, 500, 300, 100, 70, 40, 20) # add peak + ints2 <- abs(rnorm(length(mzs), 10)) + ints2[11:20] <- c(15, 30, 60, 120, 300, 200, 90, 60, 30, 23) + ints3 <- abs(rnorm(length(mzs), 10)) + ints3[11:20] <- c(13, 20, 50, 100, 200, 100, 80, 40, 30, 20) + + ## Create the spectra. + sp1 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), + intensity = ints1, rt = 1) + sp2 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.01), + intensity = ints2, rt = 2) + sp3 <- new("Spectrum1", mz = mzs + rnorm(length(mzs), sd = 0.009), + intensity = ints3, rt = 3) + sp4 <- new("Spectrum2", mz = 1:3, intensity = c(4, 8, 1)) + sp5 <- new("Spectrum2", mz = 2:4, intensity = c(5, 8, 2)) + + spctra <- Spectra(sp1, sp2, sp3, + elementMetadata = DataFrame(idx = 1:3, + group = c("b", "a", "a"))) + + res <- combineSpectra(spctra) + expect_true(length(res) == 1) + expect_equal(rtime(res), c(`1` = 1)) + expect_equal(mcols(res), DataFrame(idx = 1, group = "b", row.names = "1")) + + names(spctra) <- c("A", "B", "C") + res <- combineSpectra(spctra) + + expect_error(combineSpectra(spctra, fcol = "other")) + res <- combineSpectra(spctra, fcol = "group", mzd = 0.05) + expect_equal(lengths(intensity(res)), c(b = 191, a = 191)) + expect_equal(names(res), c("b", "a")) + expect_equal(res[[1]], sp1) + expect_equal(mcols(res), DataFrame(idx = c(1, 2), group = c("b", "a"), + row.names = c("b", "a"))) + + spctra <- Spectra(sp1, sp2, sp3, sp4, sp5, + elementMetadata = DataFrame(group = c("a", "b", "a", + "c", "c"))) + expect_error(combineSpectra(spctra)) + res <- combineSpectra(spctra, fcol = "group", mzd = 0.05) + expect_equal(names(res), c("a", "b", "c")) + expect_equal(res[[2]], sp2) + expect_equal(rtime(res), c(a = 1, b = 2, c = NA)) + expect_equal(msLevel(res), c(a = 1, b = 1, c = 2)) + expect_equal(intensity(res)[[3]], c(4, mean(c(8, 5)), mean(c(1, 8)), 2)) +}) diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 8287fdf81..15fd72443 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -584,3 +584,17 @@ test_that("merging and expanding features", { k2 <- grep("^svm", fvarLabels(hl2), value = TRUE) expect_identical(fData(hyperLOPIT2015)[, k], fData(hl2)[, k2]) }) + +test_that("levelIndex works", { + str <- c("a", "a", "b", "a", "b", "c", "c", "b", "d", "d", "d") + res <- levelIndex(str, which = "middle") + expect_equal(res, c(a = 2, b = 5, c = 6, d = 10)) + res <- levelIndex(factor(str, levels = c("b", "a", "c", "d")), "middle") + expect_equal(res, c(b = 5, a = 2, c = 6, d = 10)) + expect_error(levelIndex(str, "a")) + + res <- levelIndex(str, which = "first") + expect_equal(res, c(a = 1, b = 3, c = 6, d = 9)) + res <- levelIndex(str, which = "last") + expect_equal(res, c(a = 4, b = 8, c = 7, d = 11)) +})