Skip to content

Commit

Permalink
Merge branch 'Devel' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Catalina Vallejos authored Oct 5, 2017
2 parents 1d7711d + 16e9d70 commit af2672a
Show file tree
Hide file tree
Showing 10 changed files with 489 additions and 433 deletions.
19 changes: 19 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,25 @@ version 0.99.5: (2017-09-26)
version 0.99.6: (2017-10-05) - After @nturaga review
- `inst` folder has been deleted
- Removal of `...` arg in `segments` - `plot` method for `BASiCS_Summary`

No-spikes implementation
- `makeExampleBASiCS_Data` + `newBASiCS_Data` adapted to no-spikes case
- Temporary vignette file for no-spikes case has been created
- `HiddenBASiCS_MCMCcppNoSpikes` updated to recycle vertical integration
update functions
- Fixed bug to allow use of stochastic reference
- Fixed bug in argument order for no-spikes version
- Spikes version code clean-up
Summary of constrain types
- ConstrainType: full or subset of genes
* For the second case, different filter thresholds have been proposed
- StochasticRef or no
- Idea:
* Try StochasticRef with no filter
* Try StochasticRef with filter > 0 total counts

TO-DO
- Implement Giacomo's suggestion?

- DEVELOPMENT NOTES (potential future changes):
* Delete BASiCS_VarianceDecomp?
Expand Down
73 changes: 21 additions & 52 deletions R/BASiCS_MCMC.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,59 +343,28 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) {
# 1: Full constrain; 2: Non-zero genes only
ConstrainType <- ifelse("ConstrainType" %in% names(args),
args$ConstrainType, 2)
ConstrainLimit <- ifelse("ConstrainLimit" %in% names(args),
args$ConstrainLimit, 1)
ConstrainAlpha <- ifelse("ConstrainAlpha" %in% names(args),
args$ConstrainAlpha, 0.05)
ConstrainProb <- ifelse("ConstrainProb" %in% names(args),
args$ConstrainProb, 0.95)
StochasticRef <- ifelse("StochasticRef" %in% names(args),
args$StochasticRef, TRUE)

BatchDesign <- model.matrix(~as.factor(metadata(Data)$BatchInfo) - 1)
BatchSizes <- table(metadata(Data)$BatchInfo)
BatchIds <- as.numeric(names(BatchSizes))
BatchOffSet <- rep(1, times = nBatch)
for (k in 2:nBatch)
{
aux1 <- matrixStats::colSums2(assay(Data)[,
metadata(Data)$BatchInfo == BatchIds[k]])
aux2 <- colSums(assay(Data)[, metadata(Data)$BatchInfo == BatchIds[1]])
BatchOffSet[k] <- median(aux1) / median(aux2)
}

# Auxiliary vector contaning a gene index
Index <- (1:q.bio) - 1
# In the following '+1' is used as c++ vector indexes vectors setting
# '0' as its first element Constrain for gene-specific expression rates
if (ConstrainType == 1)
{
# Full constrain Note we use 'ConstrainLimit + 1' as 1
# pseudo-count was added
# when computing 'mu0' (to avoid numerical issues)
# Full constrain
ConstrainGene <- (1:q.bio) - 1
NotConstrainGene <- 0
Constrain <- mean(log(mu0[ConstrainGene + 1]))
}
if (ConstrainType == 2)
{
# Trimmed constrain based on mean Note we use 'ConstrainLimit + 1'
# as 1 pseudo-count
# was added when computing 'mu0' (to avoid numerical issues)
ConstrainGene <- which(mu0 >= ConstrainLimit + 1) - 1
NotConstrainGene <- which(mu0 < ConstrainLimit + 1) - 1
# Trimmed constrain based on total counts > 0
ConstrainGene <- which(sum.bycell.bio > 0) - 1
NotConstrainGene <- which(sum.bycell.bio == 0) - 1
Constrain <- mean(log(mu0[ConstrainGene + 1]))
}
if (ConstrainType == 3)
{
# Trimmed constrain based on detection
Detection <- rowMeans(assay(Data) > 0)
ConstrainGene <- which(Detection >= ConstrainLimit) - 1
NotConstrainGene <- which(Detection < ConstrainLimit) - 1
Constrain <- mean(log(mu0[ConstrainGene + 1]))
}

StochasticRef <- ifelse("StochasticRef" %in% names(args),
args$StochasticRef, FALSE)


if (StochasticRef == TRUE)
{
aux.ref <- cbind(ConstrainGene,
Expand All @@ -418,40 +387,40 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) {
as.matrix(assay(Data)),
BatchDesign,
mu0, delta0,
phi0, nu0, theta0,
phi0, nu0,
rep(theta0, nBatch),
PriorParam$s2.mu,
PriorParam$a.delta,
PriorParam$b.delta,
PriorParam$a.phi,
PriorParam$b.phi,
PriorParam$s2.delta,
PriorDeltaNum,
PriorParam$a.s,
PriorParam$b.s,
PriorParam$a.theta,
PriorParam$b.theta,
AR,
ls.mu0, ls.delta0,
ls.nu0, ls.theta0,
ls.nu0,
rep(ls.theta0, nBatch),
sum.bycell.all,
sum.bygene.all,
StoreAdaptNumber,
StopAdapt,
as.numeric(PrintProgress),
PriorParam$s2.delta,
PriorDeltaNum,
metadata(Data)$BatchInfo,
BatchIds,
as.vector(BatchSizes),
BatchOffSet, Constrain,
Constrain,
Index, RefGene, RefGenes,
ConstrainGene,
NotConstrainGene,
ConstrainType))
ConstrainType,
as.numeric(StochasticRef)))
}

Chain$mu <- Chain$mu[, 1:q.bio]
colnames(Chain$mu) <- rownames(assay(Data))[!isSpike(Data)]
colnames(Chain$delta) <- rownames(assay(Data))[!isSpike(Data)]
CellLabels <- paste0("Cell", 1:n, "_Batch", metadata(Data)$BatchInfo)
colnames(Chain$phi) <- CellLabels
if (length(metadata(Data)$SpikeInput) > 1) { colnames(Chain$s) <- CellLabels }
colnames(Chain$s) <- CellLabels
if (length(metadata(Data)$SpikeInput) > 1) { colnames(Chain$phi) <- CellLabels }
colnames(Chain$nu) <- CellLabels
colnames(Chain$theta) <- paste0("Batch", unique(metadata(Data)$BatchInfo))

Expand All @@ -468,7 +437,7 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) {

if (length(metadata(Data)$SpikeInput) == 1)
{
Chain$s <- matrix(1, ncol = ncol(Chain$phi), nrow = nrow(Chain$phi))
Chain$phi <- matrix(1, ncol = ncol(Chain$s), nrow = nrow(Chain$s))
}

ChainClass <- newBASiCS_Chain(mu = Chain$mu, delta = Chain$delta,
Expand Down
1 change: 1 addition & 0 deletions R/BASiCS_TestDE.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ BASiCS_TestDE <- function(Chain1,
GenesSelect = NULL, ...)
{
# Checking validity of input arguments
# Test
if (!is(Chain1, "BASiCS_Chain"))
stop("'Chain1' is not a 'BASiCS_Chain' class object.")
if (!is(Chain2, "BASiCS_Chain"))
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ HiddenBASiCS_DenoisedRates <- function(CountsBio, Mu, TransInvDelta, PhiNu, N, q
.Call('_BASiCS_HiddenBASiCS_DenoisedRates', PACKAGE = 'BASiCS', CountsBio, Mu, TransInvDelta, PhiNu, N, q0, n)
}

HiddenBASiCS_MCMCcppNoSpikes <- function(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, phi0, nu0, theta0, s2mu, adelta, bdelta, aphi, bphi, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, s2delta, prior_delta, BatchInfo, BatchIds, BatchSizes, BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType) {
.Call('_BASiCS_HiddenBASiCS_MCMCcppNoSpikes', PACKAGE = 'BASiCS', N, Thin, Burn, Counts, BatchDesign, mu0, delta0, phi0, nu0, theta0, s2mu, adelta, bdelta, aphi, bphi, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, s2delta, prior_delta, BatchInfo, BatchIds, BatchSizes, BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType)
HiddenBASiCS_MCMCcppNoSpikes <- function(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, s0, nu0, theta0, s2mu, adelta, bdelta, s2delta, prior_delta, as, bs, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType, StochasticRef) {
.Call('_BASiCS_HiddenBASiCS_MCMCcppNoSpikes', PACKAGE = 'BASiCS', N, Thin, Burn, Counts, BatchDesign, mu0, delta0, s0, nu0, theta0, s2mu, adelta, bdelta, s2delta, prior_delta, as, bs, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType, StochasticRef)
}

29 changes: 14 additions & 15 deletions R/makeExampleBASiCS_Data.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
WithSpikes = TRUE)
{
Mu <- c( 8.36, 10.65, 4.88, 6.29, 21.72, 12.93, 30.19, 83.92, 3.89, 6.34,
57.87, 12.45, 8.08, 7.31, 15.56, 15.91, 12.24, 15.96, 19.14, 4.20,
57.87, 12.45, 8.08, 7.31, 15.56, 15.91, 12.24, 15.96, 19.14, 4.20,
6.75, 27.74, 8.88, 21.21, 19.89, 7.14, 11.09, 7.19, 20.64, 73.9,
9.05, 6.13, 16.40, 6.08, 17.89, 6.98, 10.25, 14.05, 8.14, 5.67,
6.95, 11.16, 11.83, 7.56,159.05, 16.41, 4.58, 15.46, 10.96, 25.05,
18.54, 8.5, 4.05, 5.37, 4.63, 4.08, 3.75, 5.02, 27.74, 10.28,
18.54, 8.5, 4.05, 5.37, 4.63, 4.08, 3.75, 5.02, 27.74, 10.28,
3.91, 13.10, 8.23, 3.64, 80.77, 20.36, 3.47, 20.12, 13.29, 7.92,
25.51,173.01, 27.71, 4.89, 33.10, 3.42, 6.19, 4.29, 5.19, 8.36,
10.27, 6.86, 5.72, 37.25, 3.82, 23.97, 5.80, 14.30, 29.07, 5.30,
25.51,173.01, 27.71, 4.89, 33.10, 3.42, 6.19, 4.29, 5.19, 8.36,
10.27, 6.86, 5.72, 37.25, 3.82, 23.97, 5.80, 14.30, 29.07, 5.30,
7.47, 8.44, 4.24, 16.15, 23.39,120.22, 8.92, 97.15, 9.75, 10.07,
1010.72, 7.90, 31.59, 63.17, 1.97,252.68, 31.59, 31.59, 31.59, 63.17,
4042.89,4042.89, 3.95,126.34,252.68, 7.90, 15.79,126.34, 3.95, 15.79)
1010.72, 7.90, 31.59, 63.17, 1.97,252.68, 31.59, 31.59, 31.59, 63.17,
4042.89,4042.89, 3.95,126.34,252.68, 7.90, 15.79,126.34, 3.95, 15.79)
Delta <- c(1.29, 0.88, 1.51, 1.49, 0.54, 0.40, 0.85, 0.27, 0.53, 1.31,
0.26, 0.81, 0.72, 0.70, 0.96, 0.58, 1.15, 0.82, 0.25, 5.32,
1.13, 0.31, 0.66, 0.27, 0.76, 1.39, 1.18, 1.57, 0.55, 0.17,
Expand All @@ -59,14 +59,14 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
1.09, 1.16, 1.19, 1.14, 0.87, 1.10, 0.48, 1.06, 0.94, 0.97)
S <- c(0.38, 0.40, 0.38, 0.39, 0.34, 0.39, 0.31, 0.39, 0.40, 0.37,
0.38, 0.40, 0.38, 0.39, 0.34, 0.39, 0.31, 0.39, 0.40, 0.37)

Mu <- c(Mu[1:50], Mu[101:120])
Delta <- Delta[1:50]

q <- length(Mu)
q.bio <- length(Delta)
n <- length(Phi)

if (!WithBatch) { Theta <- 0.5 }
else
{
Expand All @@ -75,7 +75,7 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
Theta2 <- 0.75
Theta <- ifelse(seq_len(n) <= 10, Theta1, Theta2)
}

if (WithSpikes)
{
# Matrix where simulated counts will be stored
Expand Down Expand Up @@ -115,7 +115,7 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
rownames(Counts.sim) <- paste0("Gene", seq_len(q))
SpikeInfo <- data.frame(paste0("Gene", seq(q.bio+1, q)),
Mu[seq(q.bio+1, q)])

if (!WithBatch)
{
Data <- newBASiCS_Data(Counts = Counts.sim,
Expand Down Expand Up @@ -160,12 +160,11 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
Counts.sim[i, ] <- rpois(n, lambda = Nu * Rho[i, ] * Mu[i])
}
rownames(Counts.sim) <- paste0("Gene", seq_len(q.bio))

Data <- newBASiCS_Data(Counts = Counts.sim,
Tech = rep(FALSE, q.bio),
SpikeInfo = 1,
BatchInfo = c(rep(1, 10), rep(2, 10)))

}
return(Data)
}
}
59 changes: 35 additions & 24 deletions R/newBASiCS_Data.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,54 +48,65 @@
#' @references
#' Vallejos, Marioni and Richardson (2015). PLoS Computational Biology.
#'
newBASiCS_Data <- function(Counts, Tech, SpikeInfo, BatchInfo = NULL)
newBASiCS_Data <- function(Counts,
Tech,
SpikeInfo = NULL,
BatchInfo = NULL)
{
# Validity checks for SpikeInfo
if (!is.data.frame(SpikeInfo))
stop("'SpikeInfo' must be a 'data.frame'")
if (data.table::is.data.table(SpikeInfo))
if(!is.null(SpikeInfo))
{
if (!is.data.frame(SpikeInfo))
stop("'SpikeInfo' must be a 'data.frame'")

if (data.table::is.data.table(SpikeInfo))
stop("'SpikeInfo' must be a 'data.frame'")
}
else
{
message("Spikes information is not provided.\n",
"Only the horizontal integration method can be used.")
}

if (is.null(BatchInfo)) { BatchInfo <- rep(1, times = ncol(Counts)) }

# Re-ordering genes
Counts <- as.matrix(rbind(Counts[!Tech, ], Counts[Tech, ]))
Tech <- c(Tech[!Tech], Tech[Tech])
GeneName <- rownames(Counts)

if (!is.null(SpikeInfo))
{
# Extracting spike-in input molecules in the correct order
if (sum(!(GeneName[Tech] %in% SpikeInfo[, 1])) > 0)
stop("'SpikeInfo' is missing information for some of the spikes")
stop("'SpikeInfo' is missing information for some of the spikes")
if (sum(!(SpikeInfo[, 1] %in% GeneName[Tech])) > 0)
stop("'SpikeInfo' includes spikes that are not in 'Counts'")
stop("'SpikeInfo' includes spikes that are not in 'Counts'")
matching <- match(GeneName[Tech], SpikeInfo[, 1])
SpikeInput <- SpikeInfo[matching, 2]
}
else { SpikeInput <- 1 }

# Checks to assess if the data contains the required information
errors <- HiddenChecksBASiCS_Data(Counts, Tech, SpikeInput,
GeneName, BatchInfo)
if (length(errors) > 0) stop(errors)

# Create a SingleCellExperiment data object
Data <- SingleCellExperiment::SingleCellExperiment(
assays = list(Counts = as.matrix(Counts)),
metadata = list(SpikeInput = SpikeInput,
BatchInfo = BatchInfo))
assays = list(Counts = as.matrix(Counts)),
metadata = list(SpikeInput = SpikeInput,
BatchInfo = BatchInfo))
isSpike(Data, "ERCC") <- Tech

message("\n", "NOTICE: BASiCS requires a pre-filtered dataset \n",
" - You must remove poor quality cells before hand \n",
" - We recommend to pre-filter lowly expressed transcripts. \n",
" Inclusion criteria may vary for each data. \n",
" For example, remove transcripts: \n",
" - with low total counts across of all of the cells \n",
" - that are only expressed in a few cells \n",
" (genes expressed in only 1 cell are not accepted) \n",
"\n BASiCS_Filter can be used for this purpose. \n")
" - You must remove poor quality cells before hand \n",
" - We recommend to pre-filter lowly expressed transcripts. \n",
" Inclusion criteria may vary for each data. \n",
" For example, remove transcripts: \n",
" - with low total counts across of all of the cells \n",
" - that are only expressed in a few cells \n",
" (genes expressed in only 1 cell are not accepted) \n",
"\n BASiCS_Filter can be used for this purpose. \n")

return(Data)
}
}
2 changes: 1 addition & 1 deletion man/newBASiCS_Data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit af2672a

Please sign in to comment.