Skip to content

Commit

Permalink
Revert "Merge branch 'Devel' into master"
Browse files Browse the repository at this point in the history
This reverts commit af2672a, reversing
changes made to 1d7711d.
  • Loading branch information
catavallejos committed Oct 5, 2017
1 parent af2672a commit 694e90a
Show file tree
Hide file tree
Showing 10 changed files with 433 additions and 489 deletions.
19 changes: 0 additions & 19 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -595,25 +595,6 @@ version 0.99.5: (2017-09-26)
version 0.99.6: (2017-10-05) - After @nturaga review
- `inst` folder has been deleted
- Removal of `...` arg in `segments` - `plot` method for `BASiCS_Summary`

No-spikes implementation
- `makeExampleBASiCS_Data` + `newBASiCS_Data` adapted to no-spikes case
- Temporary vignette file for no-spikes case has been created
- `HiddenBASiCS_MCMCcppNoSpikes` updated to recycle vertical integration
update functions
- Fixed bug to allow use of stochastic reference
- Fixed bug in argument order for no-spikes version
- Spikes version code clean-up
Summary of constrain types
- ConstrainType: full or subset of genes
* For the second case, different filter thresholds have been proposed
- StochasticRef or no
- Idea:
* Try StochasticRef with no filter
* Try StochasticRef with filter > 0 total counts

TO-DO
- Implement Giacomo's suggestion?

- DEVELOPMENT NOTES (potential future changes):
* Delete BASiCS_VarianceDecomp?
Expand Down
73 changes: 52 additions & 21 deletions R/BASiCS_MCMC.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,28 +343,59 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) {
# 1: Full constrain; 2: Non-zero genes only
ConstrainType <- ifelse("ConstrainType" %in% names(args),
args$ConstrainType, 2)
StochasticRef <- ifelse("StochasticRef" %in% names(args),
args$StochasticRef, TRUE)
ConstrainLimit <- ifelse("ConstrainLimit" %in% names(args),
args$ConstrainLimit, 1)
ConstrainAlpha <- ifelse("ConstrainAlpha" %in% names(args),
args$ConstrainAlpha, 0.05)
ConstrainProb <- ifelse("ConstrainProb" %in% names(args),
args$ConstrainProb, 0.95)

BatchDesign <- model.matrix(~as.factor(metadata(Data)$BatchInfo) - 1)
BatchSizes <- table(metadata(Data)$BatchInfo)
BatchIds <- as.numeric(names(BatchSizes))
BatchOffSet <- rep(1, times = nBatch)
for (k in 2:nBatch)
{
aux1 <- matrixStats::colSums2(assay(Data)[,
metadata(Data)$BatchInfo == BatchIds[k]])
aux2 <- colSums(assay(Data)[, metadata(Data)$BatchInfo == BatchIds[1]])
BatchOffSet[k] <- median(aux1) / median(aux2)
}

# Auxiliary vector contaning a gene index
Index <- (1:q.bio) - 1
# In the following '+1' is used as c++ vector indexes vectors setting
# '0' as its first element Constrain for gene-specific expression rates
if (ConstrainType == 1)
{
# Full constrain
# Full constrain Note we use 'ConstrainLimit + 1' as 1
# pseudo-count was added
# when computing 'mu0' (to avoid numerical issues)
ConstrainGene <- (1:q.bio) - 1
NotConstrainGene <- 0
Constrain <- mean(log(mu0[ConstrainGene + 1]))
}
if (ConstrainType == 2)
{
# Trimmed constrain based on total counts > 0
ConstrainGene <- which(sum.bycell.bio > 0) - 1
NotConstrainGene <- which(sum.bycell.bio == 0) - 1
# Trimmed constrain based on mean Note we use 'ConstrainLimit + 1'
# as 1 pseudo-count
# was added when computing 'mu0' (to avoid numerical issues)
ConstrainGene <- which(mu0 >= ConstrainLimit + 1) - 1
NotConstrainGene <- which(mu0 < ConstrainLimit + 1) - 1
Constrain <- mean(log(mu0[ConstrainGene + 1]))
}

if (ConstrainType == 3)
{
# Trimmed constrain based on detection
Detection <- rowMeans(assay(Data) > 0)
ConstrainGene <- which(Detection >= ConstrainLimit) - 1
NotConstrainGene <- which(Detection < ConstrainLimit) - 1
Constrain <- mean(log(mu0[ConstrainGene + 1]))
}

StochasticRef <- ifelse("StochasticRef" %in% names(args),
args$StochasticRef, FALSE)

if (StochasticRef == TRUE)
{
aux.ref <- cbind(ConstrainGene,
Expand All @@ -387,40 +418,40 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) {
as.matrix(assay(Data)),
BatchDesign,
mu0, delta0,
phi0, nu0,
rep(theta0, nBatch),
phi0, nu0, theta0,
PriorParam$s2.mu,
PriorParam$a.delta,
PriorParam$b.delta,
PriorParam$s2.delta,
PriorDeltaNum,
PriorParam$a.s,
PriorParam$b.s,
PriorParam$a.phi,
PriorParam$b.phi,
PriorParam$a.theta,
PriorParam$b.theta,
AR,
ls.mu0, ls.delta0,
ls.nu0,
rep(ls.theta0, nBatch),
ls.nu0, ls.theta0,
sum.bycell.all,
sum.bygene.all,
StoreAdaptNumber,
StopAdapt,
as.numeric(PrintProgress),
Constrain,
PriorParam$s2.delta,
PriorDeltaNum,
metadata(Data)$BatchInfo,
BatchIds,
as.vector(BatchSizes),
BatchOffSet, Constrain,
Index, RefGene, RefGenes,
ConstrainGene,
NotConstrainGene,
ConstrainType,
as.numeric(StochasticRef)))
ConstrainType))
}

Chain$mu <- Chain$mu[, 1:q.bio]
colnames(Chain$mu) <- rownames(assay(Data))[!isSpike(Data)]
colnames(Chain$delta) <- rownames(assay(Data))[!isSpike(Data)]
CellLabels <- paste0("Cell", 1:n, "_Batch", metadata(Data)$BatchInfo)
colnames(Chain$s) <- CellLabels
if (length(metadata(Data)$SpikeInput) > 1) { colnames(Chain$phi) <- CellLabels }
colnames(Chain$phi) <- CellLabels
if (length(metadata(Data)$SpikeInput) > 1) { colnames(Chain$s) <- CellLabels }
colnames(Chain$nu) <- CellLabels
colnames(Chain$theta) <- paste0("Batch", unique(metadata(Data)$BatchInfo))

Expand All @@ -437,7 +468,7 @@ BASiCS_MCMC <- function(Data, N, Thin, Burn, ...) {

if (length(metadata(Data)$SpikeInput) == 1)
{
Chain$phi <- matrix(1, ncol = ncol(Chain$s), nrow = nrow(Chain$s))
Chain$s <- matrix(1, ncol = ncol(Chain$phi), nrow = nrow(Chain$phi))
}

ChainClass <- newBASiCS_Chain(mu = Chain$mu, delta = Chain$delta,
Expand Down
1 change: 0 additions & 1 deletion R/BASiCS_TestDE.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ BASiCS_TestDE <- function(Chain1,
GenesSelect = NULL, ...)
{
# Checking validity of input arguments
# Test
if (!is(Chain1, "BASiCS_Chain"))
stop("'Chain1' is not a 'BASiCS_Chain' class object.")
if (!is(Chain2, "BASiCS_Chain"))
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ HiddenBASiCS_DenoisedRates <- function(CountsBio, Mu, TransInvDelta, PhiNu, N, q
.Call('_BASiCS_HiddenBASiCS_DenoisedRates', PACKAGE = 'BASiCS', CountsBio, Mu, TransInvDelta, PhiNu, N, q0, n)
}

HiddenBASiCS_MCMCcppNoSpikes <- function(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, s0, nu0, theta0, s2mu, adelta, bdelta, s2delta, prior_delta, as, bs, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType, StochasticRef) {
.Call('_BASiCS_HiddenBASiCS_MCMCcppNoSpikes', PACKAGE = 'BASiCS', N, Thin, Burn, Counts, BatchDesign, mu0, delta0, s0, nu0, theta0, s2mu, adelta, bdelta, s2delta, prior_delta, as, bs, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType, StochasticRef)
HiddenBASiCS_MCMCcppNoSpikes <- function(N, Thin, Burn, Counts, BatchDesign, mu0, delta0, phi0, nu0, theta0, s2mu, adelta, bdelta, aphi, bphi, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, s2delta, prior_delta, BatchInfo, BatchIds, BatchSizes, BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType) {
.Call('_BASiCS_HiddenBASiCS_MCMCcppNoSpikes', PACKAGE = 'BASiCS', N, Thin, Burn, Counts, BatchDesign, mu0, delta0, phi0, nu0, theta0, s2mu, adelta, bdelta, aphi, bphi, atheta, btheta, ar, LSmu0, LSdelta0, LSnu0, LStheta0, sumByCellAll, sumByGeneAll, StoreAdapt, EndAdapt, PrintProgress, s2delta, prior_delta, BatchInfo, BatchIds, BatchSizes, BatchOffSet, Constrain, Index, RefGene, RefGenes, ConstrainGene, NotConstrainGene, ConstrainType)
}

29 changes: 15 additions & 14 deletions R/makeExampleBASiCS_Data.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
WithSpikes = TRUE)
{
Mu <- c( 8.36, 10.65, 4.88, 6.29, 21.72, 12.93, 30.19, 83.92, 3.89, 6.34,
57.87, 12.45, 8.08, 7.31, 15.56, 15.91, 12.24, 15.96, 19.14, 4.20,
57.87, 12.45, 8.08, 7.31, 15.56, 15.91, 12.24, 15.96, 19.14, 4.20,
6.75, 27.74, 8.88, 21.21, 19.89, 7.14, 11.09, 7.19, 20.64, 73.9,
9.05, 6.13, 16.40, 6.08, 17.89, 6.98, 10.25, 14.05, 8.14, 5.67,
6.95, 11.16, 11.83, 7.56,159.05, 16.41, 4.58, 15.46, 10.96, 25.05,
18.54, 8.5, 4.05, 5.37, 4.63, 4.08, 3.75, 5.02, 27.74, 10.28,
18.54, 8.5, 4.05, 5.37, 4.63, 4.08, 3.75, 5.02, 27.74, 10.28,
3.91, 13.10, 8.23, 3.64, 80.77, 20.36, 3.47, 20.12, 13.29, 7.92,
25.51,173.01, 27.71, 4.89, 33.10, 3.42, 6.19, 4.29, 5.19, 8.36,
10.27, 6.86, 5.72, 37.25, 3.82, 23.97, 5.80, 14.30, 29.07, 5.30,
25.51,173.01, 27.71, 4.89, 33.10, 3.42, 6.19, 4.29, 5.19, 8.36,
10.27, 6.86, 5.72, 37.25, 3.82, 23.97, 5.80, 14.30, 29.07, 5.30,
7.47, 8.44, 4.24, 16.15, 23.39,120.22, 8.92, 97.15, 9.75, 10.07,
1010.72, 7.90, 31.59, 63.17, 1.97,252.68, 31.59, 31.59, 31.59, 63.17,
4042.89,4042.89, 3.95,126.34,252.68, 7.90, 15.79,126.34, 3.95, 15.79)
1010.72, 7.90, 31.59, 63.17, 1.97,252.68, 31.59, 31.59, 31.59, 63.17,
4042.89,4042.89, 3.95,126.34,252.68, 7.90, 15.79,126.34, 3.95, 15.79)
Delta <- c(1.29, 0.88, 1.51, 1.49, 0.54, 0.40, 0.85, 0.27, 0.53, 1.31,
0.26, 0.81, 0.72, 0.70, 0.96, 0.58, 1.15, 0.82, 0.25, 5.32,
1.13, 0.31, 0.66, 0.27, 0.76, 1.39, 1.18, 1.57, 0.55, 0.17,
Expand All @@ -59,14 +59,14 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
1.09, 1.16, 1.19, 1.14, 0.87, 1.10, 0.48, 1.06, 0.94, 0.97)
S <- c(0.38, 0.40, 0.38, 0.39, 0.34, 0.39, 0.31, 0.39, 0.40, 0.37,
0.38, 0.40, 0.38, 0.39, 0.34, 0.39, 0.31, 0.39, 0.40, 0.37)

Mu <- c(Mu[1:50], Mu[101:120])
Delta <- Delta[1:50]

q <- length(Mu)
q.bio <- length(Delta)
n <- length(Phi)

if (!WithBatch) { Theta <- 0.5 }
else
{
Expand All @@ -75,7 +75,7 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
Theta2 <- 0.75
Theta <- ifelse(seq_len(n) <= 10, Theta1, Theta2)
}

if (WithSpikes)
{
# Matrix where simulated counts will be stored
Expand Down Expand Up @@ -115,7 +115,7 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
rownames(Counts.sim) <- paste0("Gene", seq_len(q))
SpikeInfo <- data.frame(paste0("Gene", seq(q.bio+1, q)),
Mu[seq(q.bio+1, q)])

if (!WithBatch)
{
Data <- newBASiCS_Data(Counts = Counts.sim,
Expand Down Expand Up @@ -160,11 +160,12 @@ makeExampleBASiCS_Data <- function(WithBatch = FALSE,
Counts.sim[i, ] <- rpois(n, lambda = Nu * Rho[i, ] * Mu[i])
}
rownames(Counts.sim) <- paste0("Gene", seq_len(q.bio))

Data <- newBASiCS_Data(Counts = Counts.sim,
Tech = rep(FALSE, q.bio),
SpikeInfo = 1,
BatchInfo = c(rep(1, 10), rep(2, 10)))

}
return(Data)
}
}
59 changes: 24 additions & 35 deletions R/newBASiCS_Data.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,65 +48,54 @@
#' @references
#' Vallejos, Marioni and Richardson (2015). PLoS Computational Biology.
#'
newBASiCS_Data <- function(Counts,
Tech,
SpikeInfo = NULL,
BatchInfo = NULL)
newBASiCS_Data <- function(Counts, Tech, SpikeInfo, BatchInfo = NULL)
{
# Validity checks for SpikeInfo
if(!is.null(SpikeInfo))
{
if (!is.data.frame(SpikeInfo))
if (!is.data.frame(SpikeInfo))
stop("'SpikeInfo' must be a 'data.frame'")
if (data.table::is.data.table(SpikeInfo))
stop("'SpikeInfo' must be a 'data.frame'")
}
else
{
message("Spikes information is not provided.\n",
"Only the horizontal integration method can be used.")
}

if (data.table::is.data.table(SpikeInfo))
stop("'SpikeInfo' must be a 'data.frame'")

if (is.null(BatchInfo)) { BatchInfo <- rep(1, times = ncol(Counts)) }

# Re-ordering genes
Counts <- as.matrix(rbind(Counts[!Tech, ], Counts[Tech, ]))
Tech <- c(Tech[!Tech], Tech[Tech])
GeneName <- rownames(Counts)

if (!is.null(SpikeInfo))
{
# Extracting spike-in input molecules in the correct order
if (sum(!(GeneName[Tech] %in% SpikeInfo[, 1])) > 0)
stop("'SpikeInfo' is missing information for some of the spikes")
stop("'SpikeInfo' is missing information for some of the spikes")
if (sum(!(SpikeInfo[, 1] %in% GeneName[Tech])) > 0)
stop("'SpikeInfo' includes spikes that are not in 'Counts'")
stop("'SpikeInfo' includes spikes that are not in 'Counts'")
matching <- match(GeneName[Tech], SpikeInfo[, 1])
SpikeInput <- SpikeInfo[matching, 2]
}
else { SpikeInput <- 1 }

# Checks to assess if the data contains the required information
errors <- HiddenChecksBASiCS_Data(Counts, Tech, SpikeInput,
GeneName, BatchInfo)
if (length(errors) > 0) stop(errors)

# Create a SingleCellExperiment data object
Data <- SingleCellExperiment::SingleCellExperiment(
assays = list(Counts = as.matrix(Counts)),
metadata = list(SpikeInput = SpikeInput,
BatchInfo = BatchInfo))
assays = list(Counts = as.matrix(Counts)),
metadata = list(SpikeInput = SpikeInput,
BatchInfo = BatchInfo))
isSpike(Data, "ERCC") <- Tech

message("\n", "NOTICE: BASiCS requires a pre-filtered dataset \n",
" - You must remove poor quality cells before hand \n",
" - We recommend to pre-filter lowly expressed transcripts. \n",
" Inclusion criteria may vary for each data. \n",
" For example, remove transcripts: \n",
" - with low total counts across of all of the cells \n",
" - that are only expressed in a few cells \n",
" (genes expressed in only 1 cell are not accepted) \n",
"\n BASiCS_Filter can be used for this purpose. \n")
" - You must remove poor quality cells before hand \n",
" - We recommend to pre-filter lowly expressed transcripts. \n",
" Inclusion criteria may vary for each data. \n",
" For example, remove transcripts: \n",
" - with low total counts across of all of the cells \n",
" - that are only expressed in a few cells \n",
" (genes expressed in only 1 cell are not accepted) \n",
"\n BASiCS_Filter can be used for this purpose. \n")

return(Data)
}
}
2 changes: 1 addition & 1 deletion man/newBASiCS_Data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 694e90a

Please sign in to comment.