Skip to content

Commit

Permalink
Assure feature CV feature variable names are unique when combining fe…
Browse files Browse the repository at this point in the history
…ature repeatedly - closes issue #303
  • Loading branch information
LaurentGatto committed Apr 4, 2018
1 parent 29ffec9 commit 96483fe
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 27 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Changes in version 2.5.10
<2018-03-26>.
- Centroiding information is retrieved from raw files (for mzML/mzXML files;.
see issue [#325](https://github.com/lgatto/MSnbase/issues/325) <2018-03-27>
- Assure feature CV feature variable names are unique when combining
feature repeatedly (see issue #303) <2018-04-04 Wed>

Changes in version 2.5.9
------------------------
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
<2018-03-26>.
- Centroiding information is retrieved from raw files (for mzML/mzXML files;.
see issue [#325](https://github.com/lgatto/MSnbase/issues/325) <2018-03-27>
- Assure feature CV feature variable names are unique when combining
feature repeatedly (see issue #303) <2018-04-04 Wed>

## Changes in version 2.5.9
- New combineSpectra, combineSpectraMovingWindow, estimateMzScattering and
Expand Down
16 changes: 15 additions & 1 deletion R/combineFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,22 @@ combineFeaturesV <- function(object, ## MSnSet
) {
groupBy <- as.character(groupBy)
if (cv) {
## New cv feature variable names
cvfvars <- paste("CV", sampleNames(object), sep = ".")
## If not already present, use as is (i.e no suffix needed)
if (!any(cvfvars %in% fvarLabels(object))) {
.suffix <- NULL
} else {
## Add a numeric suffix that isn't already in use
.suffix <- 0
while (any(cvfvars %in% fvarLabels(object))) {
.suffix <- .suffix + 1
cvfvars <- paste(cvfvars, .suffix, sep = ".")
}
}
cv.mat <- featureCV(object, groupBy = groupBy,
norm = cv.norm)
norm = cv.norm,
suffix = .suffix)
}
n1 <- nrow(object)
## !! order of features in matRes is defined by the groupBy factor !!
Expand Down
55 changes: 32 additions & 23 deletions R/functions-MSnSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,40 +50,49 @@ normalise_MSnSet <- function(object, method, ...) {
}


##' This function calculates the column-wise coefficient of variation (CV), i.e.
##' the ration between the standard deviation and the mean, for the features
##' in an \code{"\linkS4class{MSnSet}"}. The CVs are calculated for the groups
##' of features defined by \code{groupBy}. For groups defined by single features,
##' \code{NA} is returned.
##' This function calculates the column-wise coefficient of variation
##' (CV), i.e. the ration between the standard deviation and the
##' mean, for the features in an [`MSnSet`]. The CVs are calculated
##' for the groups of features defined by `groupBy`. For groups
##' defined by single features, `NA` is returned.
##'
##' @title Calculates coeffivient of variation for features
##' @param x An instance of class \code{"\linkS4class{MSnSet}"}.
##' @param groupBy An object of class \code{factor} defining how to summerise the features.
##' @param na.rm A \code{logical} defining whether missing values should be removed.
##' @param norm One of 'none' (default), 'sum', 'max', 'center.mean', 'center.median'
##' 'quantiles' or 'quantiles.robust' defining if and how the data should be normalised
##' prior to CV calculation. See \code{\link{normalise}} for more details.
##' @return A \code{matrix} of dimensions \code{length(levels(groupBy))} by \code{ncol(x)}
##' with the respecive CVs.
##' @author Laurent Gatto <lg390@@cam.ac.uk>,
##' Sebastian Gibb <mail@@sebastiangibb.de>
##' @seealso \code{\link{combineFeatures}}
##' @param x An instance of class [`MSnSet`].
##' @param groupBy An object of class `factor` defining how to
##' summarise the features.
##' @param na.rm A `logical(1)` defining whether missing values should
##' be removed.
##' @param norm One of normalisation methods applied prior to CV
##' calculation. See [normalise()] for more details. Here, the
##' default is `'none'`, i.e. no normalisation.
##' @param suffix A `character(1)` to be used to name the new CV
##' columns. Default is `NULL` to ignore this. This argument
##' should be set when CV values are already present in the
##' [`MSnSet`] feature variables.
##' @return A `matrix` of dimensions `length(levels(groupBy))` by
##' `ncol(x)` with the respecive CVs. The column names are formed
##' by pasting `CV.` and the sample names of object `x`, possibly
##' suffixed by `.suffix`.
##' @author Laurent Gatto and Sebastian Gibb
##' @seealso [combineFeatures()]
##' @md
##' @examples
##' data(msnset)
##' msnset <- msnset[1:4]
##' gb <- factor(rep(1:2, each = 2))
##' featureCV(msnset, gb)
##' featureCV(msnset, gb, suffix = "2")
featureCV <- function(x, groupBy, na.rm = TRUE,
norm = c("sum", "max", "none",
"center.mean", "center.median",
"quantiles", "quantiles.robust")) {
norm <- match.arg(norm)
norm = "none",
suffix = NULL) {
if (norm != "none")
x <- normalise(x, method = norm)

cv <- rowsd(exprs(x), group=groupBy, reorder=TRUE, na.rm=na.rm) /
rowmean(exprs(x), group=groupBy, reorder=TRUE, na.rm=na.rm)
colnames(cv) <- paste("CV", colnames(cv), sep=".")
cv <- rowsd(exprs(x), group = groupBy, reorder = TRUE, na.rm = na.rm) /
rowmean(exprs(x), group = groupBy, reorder = TRUE, na.rm = na.rm)
colnames(cv) <- paste("CV", colnames(cv), sep = ".")
if (!is.null(suffix))
colnames(cv) <- paste(colnames(cv), suffix, sep = ".")
cv
}

Expand Down
21 changes: 18 additions & 3 deletions tests/testthat/test_MSnSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ test_that("Combine MSnSet features: groupBy and fcol", {
x1 <- combineFeatures(msnset, groupBy = grp)
x2 <- combineFeatures(msnset, fcol = "k")
x2@processingData <- x1@processingData
expect_equal(x1, x2)
expect_equal(x1, x2)
})

test_that("Combine MSnSet features (V)", {
Expand Down Expand Up @@ -98,7 +98,7 @@ test_that("Combine MSnSet features (V)", {
matrix(c(5*4, 5*10, 5*4, 5*10), ncol = 2),
tolerance = .001,
check.attributes = FALSE)

expect_true(all(fData(bb)[, 1] == c("A", "B")))
expect_true(all(fData(bb)[, 2] == c("A.1", "B.6")))
gb2 <- factor(c("a", "c", "z", "a", "z", "b", "b", "a", "c", "a"))
Expand Down Expand Up @@ -181,6 +181,17 @@ test_that("Combine MSnSet features (L)", {
colMeans(exprs(ee)[sapply(L, function(l) any(grepl("C", l))), ]))
})

test_that("Combine MSnSet features repeatedly", {
data(msnset)
fv1 <- fvarLabels(msnset)
pep <- combineFeatures(msnset, groupBy = fData(msnset)$PeptideSequence,
cv = TRUE)
prot <- combineFeatures(pep, groupBy = fData(pep)$ProteinAccession,
cv = TRUE)
expect_identical(length(fv1) + 4L, length(fvarLabels(pep)))
expect_identical(length(fv1) + 8L, length(fvarLabels(prot)))
})

test_that("makeImpuritiesMatrix", {
i4 <- dir(system.file("extdata", package = "MSnbase"),
pattern = "iTRAQ4plexPurityCorrection",
Expand Down Expand Up @@ -290,6 +301,10 @@ test_that("featureCV", {
sd((exprs(m)/div)[ii]/mean((exprs(m)/div)[ii])) }),
nrow=2, ncol=2, dimnames=list(c("A", "B"), c("CV.1", "CV.2")))
expect_equal(featureCV(m, group=fData(m)$accession, norm="sum"), cv)

cv1 <- featureCV(m, group=fData(m)$accession)
cv2 <- featureCV(m, group=fData(m)$accession, suffix = "2")
expect_identical(colnames(cv1), sub("\\.2", "", colnames(cv2)))
})

context("MSnSet identification data")
Expand Down Expand Up @@ -429,7 +444,7 @@ test_that("nFeatures are added correctly", {
"'Protein.Group.Accessions.nFeatures' already present.")
expect_error(nFeatures(hyperLOPIT2015ms3r1psm, "foo"))
tmp <- fData(res)$Protein.Group.Accessions.nFeatures
names(tmp) <- fData(res)$Protein.Group.Accessions
names(tmp) <- fData(res)$Protein.Group.Accessions
sel <- !duplicated(names(tmp))
g <- tmp[sel]
expect_equivalent(g[sort(names(g))], k0[sort(names(k0))])
Expand Down

0 comments on commit 96483fe

Please sign in to comment.